aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorvvvv <vvvv@ydb.tech>2023-07-31 18:21:04 +0300
committervvvv <vvvv@ydb.tech>2023-07-31 18:21:04 +0300
commitdec41c40e51aa407edef81a3c566a5a15780fc49 (patch)
tree4f197b596b32f35eca368121f0dff913419da9af
parent3ca8b54c96e09eb2b65be7f09675623438d559c7 (diff)
downloadydb-dec41c40e51aa407edef81a3c566a5a15780fc49.tar.gz
YQL-16239 Move purecalc to public
-rw-r--r--CMakeLists.darwin-x86_64.txt6
-rw-r--r--CMakeLists.linux-aarch64.txt6
-rw-r--r--CMakeLists.linux-x86_64.txt6
-rw-r--r--CMakeLists.windows-x86_64.txt6
-rw-r--r--contrib/libs/libc_compat/include/link/link.h15
-rw-r--r--contrib/tools/ragel5/common/buffer.h55
-rw-r--r--contrib/tools/ragel5/common/common.cpp296
-rw-r--r--contrib/tools/ragel5/common/common.h308
-rw-r--r--contrib/tools/ragel5/common/config.h39
-rw-r--r--contrib/tools/ragel5/common/pcheck.h51
-rw-r--r--contrib/tools/ragel5/common/version.h2
-rw-r--r--contrib/tools/ragel5/common/ya.make20
-rw-r--r--contrib/tools/ragel5/ragel/fsmap.cpp840
-rw-r--r--contrib/tools/ragel5/ragel/fsmattach.cpp425
-rw-r--r--contrib/tools/ragel5/ragel/fsmbase.cpp598
-rw-r--r--contrib/tools/ragel5/ragel/fsmgraph.cpp1426
-rw-r--r--contrib/tools/ragel5/ragel/fsmgraph.h1482
-rw-r--r--contrib/tools/ragel5/ragel/fsmmin.cpp732
-rw-r--r--contrib/tools/ragel5/ragel/fsmstate.cpp463
-rw-r--r--contrib/tools/ragel5/ragel/main.cpp355
-rw-r--r--contrib/tools/ragel5/ragel/parsedata.cpp1505
-rw-r--r--contrib/tools/ragel5/ragel/parsedata.h401
-rw-r--r--contrib/tools/ragel5/ragel/parsetree.cpp2089
-rw-r--r--contrib/tools/ragel5/ragel/parsetree.h755
-rw-r--r--contrib/tools/ragel5/ragel/ragel.h74
-rw-r--r--contrib/tools/ragel5/ragel/rlparse.cpp6088
-rw-r--r--contrib/tools/ragel5/ragel/rlparse.h184
-rw-r--r--contrib/tools/ragel5/ragel/rlscan.cpp4876
-rw-r--r--contrib/tools/ragel5/ragel/rlscan.h161
-rw-r--r--contrib/tools/ragel5/ragel/xmlcodegen.cpp713
-rw-r--r--contrib/tools/ragel5/ragel/xmlcodegen.h137
-rw-r--r--contrib/tools/ragel5/ragel/ya.make26
-rw-r--r--contrib/tools/ragel5/redfsm/gendata.cpp717
-rw-r--r--contrib/tools/ragel5/redfsm/gendata.h167
-rw-r--r--contrib/tools/ragel5/redfsm/phash.h10
-rw-r--r--contrib/tools/ragel5/redfsm/redfsm.cpp559
-rw-r--r--contrib/tools/ragel5/redfsm/redfsm.h534
-rw-r--r--contrib/tools/ragel5/redfsm/xmlparse.cpp3549
-rw-r--r--contrib/tools/ragel5/redfsm/xmlparse.h228
-rw-r--r--contrib/tools/ragel5/redfsm/xmlscan.cpp925
-rw-r--r--contrib/tools/ragel5/redfsm/xmltags.cpp244
-rw-r--r--contrib/tools/ragel5/redfsm/ya.make25
-rw-r--r--contrib/tools/ragel5/rlgen-cd/fflatcodegen.cpp351
-rw-r--r--contrib/tools/ragel5/rlgen-cd/fflatcodegen.h76
-rw-r--r--contrib/tools/ragel5/rlgen-cd/fgotocodegen.cpp262
-rw-r--r--contrib/tools/ragel5/rlgen-cd/fgotocodegen.h76
-rw-r--r--contrib/tools/ragel5/rlgen-cd/flatcodegen.cpp766
-rw-r--r--contrib/tools/ragel5/rlgen-cd/flatcodegen.h108
-rw-r--r--contrib/tools/ragel5/rlgen-cd/fsmcodegen.cpp749
-rw-r--r--contrib/tools/ragel5/rlgen-cd/fsmcodegen.h218
-rw-r--r--contrib/tools/ragel5/rlgen-cd/ftabcodegen.cpp405
-rw-r--r--contrib/tools/ragel5/rlgen-cd/ftabcodegen.h78
-rw-r--r--contrib/tools/ragel5/rlgen-cd/gotocodegen.cpp742
-rw-r--r--contrib/tools/ragel5/rlgen-cd/gotocodegen.h111
-rw-r--r--contrib/tools/ragel5/rlgen-cd/ipgotocodegen.cpp414
-rw-r--r--contrib/tools/ragel5/rlgen-cd/ipgotocodegen.h97
-rw-r--r--contrib/tools/ragel5/rlgen-cd/main.cpp394
-rw-r--r--contrib/tools/ragel5/rlgen-cd/rlgen-cd.h60
-rw-r--r--contrib/tools/ragel5/rlgen-cd/splitcodegen.cpp521
-rw-r--r--contrib/tools/ragel5/rlgen-cd/splitcodegen.h71
-rw-r--r--contrib/tools/ragel5/rlgen-cd/tabcodegen.cpp988
-rw-r--r--contrib/tools/ragel5/rlgen-cd/tabcodegen.h115
-rw-r--r--contrib/tools/ragel5/rlgen-cd/ya.make25
-rw-r--r--geobase/CMakeLists.txt10
-rw-r--r--geobase/library/CMakeLists.darwin-x86_64.txt22
-rw-r--r--geobase/library/CMakeLists.linux-aarch64.txt23
-rw-r--r--geobase/library/CMakeLists.linux-x86_64.txt23
-rw-r--r--geobase/library/CMakeLists.txt17
-rw-r--r--geobase/library/CMakeLists.windows-x86_64.txt22
-rw-r--r--geobase/library/abi/CMakeLists.darwin-x86_64.txt56
-rw-r--r--geobase/library/abi/CMakeLists.linux-aarch64.txt57
-rw-r--r--geobase/library/abi/CMakeLists.linux-x86_64.txt57
-rw-r--r--geobase/library/abi/CMakeLists.txt17
-rw-r--r--geobase/library/abi/CMakeLists.windows-x86_64.txt56
-rw-r--r--geobase/library/api/CMakeLists.darwin-x86_64.txt25
-rw-r--r--geobase/library/api/CMakeLists.linux-aarch64.txt26
-rw-r--r--geobase/library/api/CMakeLists.linux-x86_64.txt26
-rw-r--r--geobase/library/api/CMakeLists.txt17
-rw-r--r--geobase/library/api/CMakeLists.windows-x86_64.txt25
-rw-r--r--geobase/library/city_id_calc/CMakeLists.darwin-x86_64.txt20
-rw-r--r--geobase/library/city_id_calc/CMakeLists.linux-aarch64.txt21
-rw-r--r--geobase/library/city_id_calc/CMakeLists.linux-x86_64.txt21
-rw-r--r--geobase/library/city_id_calc/CMakeLists.txt17
-rw-r--r--geobase/library/city_id_calc/CMakeLists.windows-x86_64.txt20
-rw-r--r--geobase/library/db/CMakeLists.txt9
-rw-r--r--geobase/library/db/stub/CMakeLists.darwin-x86_64.txt21
-rw-r--r--geobase/library/db/stub/CMakeLists.linux-aarch64.txt22
-rw-r--r--geobase/library/db/stub/CMakeLists.linux-x86_64.txt22
-rw-r--r--geobase/library/db/stub/CMakeLists.txt17
-rw-r--r--geobase/library/db/stub/CMakeLists.windows-x86_64.txt21
-rw-r--r--geobase/library/dispute_regs/CMakeLists.darwin-x86_64.txt24
-rw-r--r--geobase/library/dispute_regs/CMakeLists.linux-aarch64.txt25
-rw-r--r--geobase/library/dispute_regs/CMakeLists.linux-x86_64.txt25
-rw-r--r--geobase/library/dispute_regs/CMakeLists.txt17
-rw-r--r--geobase/library/dispute_regs/CMakeLists.windows-x86_64.txt24
-rw-r--r--geobase/library/dispute_regs/proto/CMakeLists.darwin-x86_64.txt43
-rw-r--r--geobase/library/dispute_regs/proto/CMakeLists.linux-aarch64.txt44
-rw-r--r--geobase/library/dispute_regs/proto/CMakeLists.linux-x86_64.txt44
-rw-r--r--geobase/library/dispute_regs/proto/CMakeLists.txt17
-rw-r--r--geobase/library/dispute_regs/proto/CMakeLists.windows-x86_64.txt43
-rw-r--r--geobase/library/dispute_regs/resource/CMakeLists.darwin-x86_64.txt38
-rw-r--r--geobase/library/dispute_regs/resource/CMakeLists.linux-aarch64.txt40
-rw-r--r--geobase/library/dispute_regs/resource/CMakeLists.linux-x86_64.txt40
-rw-r--r--geobase/library/dispute_regs/resource/CMakeLists.txt17
-rw-r--r--geobase/library/dispute_regs/resource/CMakeLists.windows-x86_64.txt38
-rw-r--r--geobase/library/utils/CMakeLists.darwin-x86_64.txt23
-rw-r--r--geobase/library/utils/CMakeLists.linux-aarch64.txt24
-rw-r--r--geobase/library/utils/CMakeLists.linux-x86_64.txt24
-rw-r--r--geobase/library/utils/CMakeLists.txt17
-rw-r--r--geobase/library/utils/CMakeLists.windows-x86_64.txt23
-rw-r--r--geobase/user-settings/CMakeLists.darwin-x86_64.txt31
-rw-r--r--geobase/user-settings/CMakeLists.linux-aarch64.txt32
-rw-r--r--geobase/user-settings/CMakeLists.linux-x86_64.txt32
-rw-r--r--geobase/user-settings/CMakeLists.txt17
-rw-r--r--geobase/user-settings/CMakeLists.windows-x86_64.txt31
-rw-r--r--kernel/CMakeLists.txt16
-rw-r--r--kernel/blogs/CMakeLists.txt9
-rw-r--r--kernel/blogs/protos/CMakeLists.darwin-x86_64.txt56
-rw-r--r--kernel/blogs/protos/CMakeLists.linux-aarch64.txt57
-rw-r--r--kernel/blogs/protos/CMakeLists.linux-x86_64.txt57
-rw-r--r--kernel/blogs/protos/CMakeLists.txt17
-rw-r--r--kernel/blogs/protos/CMakeLists.windows-x86_64.txt56
-rw-r--r--kernel/hosts/CMakeLists.txt9
-rw-r--r--kernel/hosts/owner/CMakeLists.darwin-x86_64.txt53
-rw-r--r--kernel/hosts/owner/CMakeLists.linux-aarch64.txt54
-rw-r--r--kernel/hosts/owner/CMakeLists.linux-x86_64.txt54
-rw-r--r--kernel/hosts/owner/CMakeLists.txt17
-rw-r--r--kernel/hosts/owner/CMakeLists.windows-x86_64.txt53
-rw-r--r--kernel/indexann/CMakeLists.txt9
-rw-r--r--kernel/indexann/protos/CMakeLists.darwin-x86_64.txt57
-rw-r--r--kernel/indexann/protos/CMakeLists.linux-aarch64.txt58
-rw-r--r--kernel/indexann/protos/CMakeLists.linux-x86_64.txt58
-rw-r--r--kernel/indexann/protos/CMakeLists.txt17
-rw-r--r--kernel/indexann/protos/CMakeLists.windows-x86_64.txt57
-rw-r--r--kernel/langregion/CMakeLists.darwin-x86_64.txt18
-rw-r--r--kernel/langregion/CMakeLists.linux-aarch64.txt19
-rw-r--r--kernel/langregion/CMakeLists.linux-x86_64.txt19
-rw-r--r--kernel/langregion/CMakeLists.txt17
-rw-r--r--kernel/langregion/CMakeLists.windows-x86_64.txt18
-rw-r--r--kernel/mango/CMakeLists.txt9
-rw-r--r--kernel/mango/proto/CMakeLists.darwin-x86_64.txt176
-rw-r--r--kernel/mango/proto/CMakeLists.linux-aarch64.txt177
-rw-r--r--kernel/mango/proto/CMakeLists.linux-x86_64.txt177
-rw-r--r--kernel/mango/proto/CMakeLists.txt17
-rw-r--r--kernel/mango/proto/CMakeLists.windows-x86_64.txt176
-rw-r--r--kernel/multilanguage_hosts/CMakeLists.darwin-x86_64.txt20
-rw-r--r--kernel/multilanguage_hosts/CMakeLists.linux-aarch64.txt21
-rw-r--r--kernel/multilanguage_hosts/CMakeLists.linux-x86_64.txt21
-rw-r--r--kernel/multilanguage_hosts/CMakeLists.txt17
-rw-r--r--kernel/multilanguage_hosts/CMakeLists.windows-x86_64.txt20
-rw-r--r--kernel/search_zone/CMakeLists.txt9
-rw-r--r--kernel/search_zone/protos/CMakeLists.darwin-x86_64.txt45
-rw-r--r--kernel/search_zone/protos/CMakeLists.linux-aarch64.txt46
-rw-r--r--kernel/search_zone/protos/CMakeLists.linux-x86_64.txt46
-rw-r--r--kernel/search_zone/protos/CMakeLists.txt17
-rw-r--r--kernel/search_zone/protos/CMakeLists.windows-x86_64.txt45
-rw-r--r--kernel/urlnorm/CMakeLists.darwin-x86_64.txt39
-rw-r--r--kernel/urlnorm/CMakeLists.linux-aarch64.txt40
-rw-r--r--kernel/urlnorm/CMakeLists.linux-x86_64.txt40
-rw-r--r--kernel/urlnorm/CMakeLists.txt17
-rw-r--r--kernel/urlnorm/CMakeLists.windows-x86_64.txt39
-rw-r--r--library/cpp/CMakeLists.darwin-x86_64.txt9
-rw-r--r--library/cpp/CMakeLists.linux-aarch64.txt9
-rw-r--r--library/cpp/CMakeLists.linux-x86_64.txt9
-rw-r--r--library/cpp/CMakeLists.windows-x86_64.txt9
-rw-r--r--library/cpp/containers/CMakeLists.txt1
-rw-r--r--library/cpp/containers/str_hash/CMakeLists.darwin-x86_64.txt19
-rw-r--r--library/cpp/containers/str_hash/CMakeLists.linux-aarch64.txt20
-rw-r--r--library/cpp/containers/str_hash/CMakeLists.linux-x86_64.txt20
-rw-r--r--library/cpp/containers/str_hash/CMakeLists.txt17
-rw-r--r--library/cpp/containers/str_hash/CMakeLists.windows-x86_64.txt19
-rw-r--r--library/cpp/containers/str_hash/str_hash.cpp60
-rw-r--r--library/cpp/containers/str_hash/str_hash.h181
-rw-r--r--library/cpp/containers/str_hash/ya.make12
-rw-r--r--library/cpp/deprecated/CMakeLists.txt4
-rw-r--r--library/cpp/deprecated/autoarray/CMakeLists.darwin-x86_64.txt17
-rw-r--r--library/cpp/deprecated/autoarray/CMakeLists.linux-aarch64.txt18
-rw-r--r--library/cpp/deprecated/autoarray/CMakeLists.linux-x86_64.txt18
-rw-r--r--library/cpp/deprecated/autoarray/CMakeLists.txt17
-rw-r--r--library/cpp/deprecated/autoarray/CMakeLists.windows-x86_64.txt17
-rw-r--r--library/cpp/deprecated/autoarray/README.md3
-rw-r--r--library/cpp/deprecated/autoarray/autoarray.cpp1
-rw-r--r--library/cpp/deprecated/autoarray/autoarray.h264
-rw-r--r--library/cpp/deprecated/autoarray/ya.make7
-rw-r--r--library/cpp/deprecated/datafile/CMakeLists.darwin-x86_64.txt19
-rw-r--r--library/cpp/deprecated/datafile/CMakeLists.linux-aarch64.txt20
-rw-r--r--library/cpp/deprecated/datafile/CMakeLists.linux-x86_64.txt20
-rw-r--r--library/cpp/deprecated/datafile/CMakeLists.txt17
-rw-r--r--library/cpp/deprecated/datafile/CMakeLists.windows-x86_64.txt19
-rw-r--r--library/cpp/deprecated/datafile/README.md3
-rw-r--r--library/cpp/deprecated/datafile/datafile.cpp42
-rw-r--r--library/cpp/deprecated/datafile/datafile.h88
-rw-r--r--library/cpp/deprecated/datafile/loadmode.cpp1
-rw-r--r--library/cpp/deprecated/datafile/loadmode.h20
-rw-r--r--library/cpp/deprecated/datafile/ya.make12
-rw-r--r--library/cpp/deprecated/fgood/CMakeLists.darwin-x86_64.txt18
-rw-r--r--library/cpp/deprecated/fgood/CMakeLists.linux-aarch64.txt19
-rw-r--r--library/cpp/deprecated/fgood/CMakeLists.linux-x86_64.txt19
-rw-r--r--library/cpp/deprecated/fgood/CMakeLists.txt17
-rw-r--r--library/cpp/deprecated/fgood/CMakeLists.windows-x86_64.txt18
-rw-r--r--library/cpp/deprecated/fgood/README.md15
-rw-r--r--library/cpp/deprecated/fgood/ffb.cpp407
-rw-r--r--library/cpp/deprecated/fgood/ffb.h264
-rw-r--r--library/cpp/deprecated/fgood/fgood.cpp70
-rw-r--r--library/cpp/deprecated/fgood/fgood.h328
-rw-r--r--library/cpp/deprecated/fgood/fput.h79
-rw-r--r--library/cpp/deprecated/fgood/ya.make8
-rw-r--r--library/cpp/deprecated/mapped_file/CMakeLists.darwin-x86_64.txt17
-rw-r--r--library/cpp/deprecated/mapped_file/CMakeLists.linux-aarch64.txt18
-rw-r--r--library/cpp/deprecated/mapped_file/CMakeLists.linux-x86_64.txt18
-rw-r--r--library/cpp/deprecated/mapped_file/CMakeLists.txt17
-rw-r--r--library/cpp/deprecated/mapped_file/CMakeLists.windows-x86_64.txt17
-rw-r--r--library/cpp/deprecated/mapped_file/mapped_file.cpp64
-rw-r--r--library/cpp/deprecated/mapped_file/ya.make7
-rw-r--r--library/cpp/geo/CMakeLists.darwin-x86_64.txt24
-rw-r--r--library/cpp/geo/CMakeLists.linux-aarch64.txt25
-rw-r--r--library/cpp/geo/CMakeLists.linux-x86_64.txt25
-rw-r--r--library/cpp/geo/CMakeLists.txt17
-rw-r--r--library/cpp/geo/CMakeLists.windows-x86_64.txt24
-rw-r--r--library/cpp/geo/bbox.cpp1
-rw-r--r--library/cpp/geo/bbox.h59
-rw-r--r--library/cpp/geo/geo.cpp1
-rw-r--r--library/cpp/geo/geo.h8
-rw-r--r--library/cpp/geo/load_save_helper.cpp49
-rw-r--r--library/cpp/geo/load_save_helper.h23
-rw-r--r--library/cpp/geo/point.cpp146
-rw-r--r--library/cpp/geo/point.h198
-rw-r--r--library/cpp/geo/polygon.cpp28
-rw-r--r--library/cpp/geo/polygon.h90
-rw-r--r--library/cpp/geo/size.cpp31
-rw-r--r--library/cpp/geo/size.h93
-rw-r--r--library/cpp/geo/style/ya.make8
-rw-r--r--library/cpp/geo/ut/load_save_helper_ut.cpp90
-rw-r--r--library/cpp/geo/ut/point_ut.cpp171
-rw-r--r--library/cpp/geo/ut/polygon_ut.cpp34
-rw-r--r--library/cpp/geo/ut/size_ut.cpp29
-rw-r--r--library/cpp/geo/ut/util_ut.cpp36
-rw-r--r--library/cpp/geo/ut/window_ut.cpp547
-rw-r--r--library/cpp/geo/ut/ya.make12
-rw-r--r--library/cpp/geo/util.cpp34
-rw-r--r--library/cpp/geo/util.h107
-rw-r--r--library/cpp/geo/window.cpp297
-rw-r--r--library/cpp/geo/window.h264
-rw-r--r--library/cpp/geo/ya.make19
-rw-r--r--library/cpp/geobase/CMakeLists.darwin-x86_64.txt30
-rw-r--r--library/cpp/geobase/CMakeLists.linux-aarch64.txt31
-rw-r--r--library/cpp/geobase/CMakeLists.linux-x86_64.txt31
-rw-r--r--library/cpp/geobase/CMakeLists.txt17
-rw-r--r--library/cpp/geobase/CMakeLists.windows-x86_64.txt30
-rw-r--r--library/cpp/geobase/geobase.cpp3
-rw-r--r--library/cpp/geobase/lookup.hpp44
-rw-r--r--library/cpp/geobase/service_getter.hpp7
-rw-r--r--library/cpp/geobase/timezone_getter.hpp9
-rw-r--r--library/cpp/geobase/ya.make13
-rw-r--r--library/cpp/geohash/CMakeLists.darwin-x86_64.txt32
-rw-r--r--library/cpp/geohash/CMakeLists.linux-aarch64.txt33
-rw-r--r--library/cpp/geohash/CMakeLists.linux-x86_64.txt33
-rw-r--r--library/cpp/geohash/CMakeLists.txt17
-rw-r--r--library/cpp/geohash/CMakeLists.windows-x86_64.txt32
-rw-r--r--library/cpp/geohash/direction.h14
-rw-r--r--library/cpp/geohash/geohash.cpp413
-rw-r--r--library/cpp/geohash/geohash.h123
-rw-r--r--library/cpp/geohash/ya.make13
-rw-r--r--library/cpp/ipreg/CMakeLists.darwin-x86_64.txt53
-rw-r--r--library/cpp/ipreg/CMakeLists.linux-aarch64.txt54
-rw-r--r--library/cpp/ipreg/CMakeLists.linux-x86_64.txt54
-rw-r--r--library/cpp/ipreg/CMakeLists.txt17
-rw-r--r--library/cpp/ipreg/CMakeLists.windows-x86_64.txt53
-rw-r--r--library/cpp/ipreg/address.cpp365
-rw-r--r--library/cpp/ipreg/address.h137
-rw-r--r--library/cpp/ipreg/checker.cpp47
-rw-r--r--library/cpp/ipreg/checker.h37
-rw-r--r--library/cpp/ipreg/merge.cpp69
-rw-r--r--library/cpp/ipreg/merge.h11
-rw-r--r--library/cpp/ipreg/range.cpp198
-rw-r--r--library/cpp/ipreg/range.h50
-rw-r--r--library/cpp/ipreg/reader.cpp82
-rw-r--r--library/cpp/ipreg/reader.h57
-rw-r--r--library/cpp/ipreg/sources.cpp100
-rw-r--r--library/cpp/ipreg/sources.h53
-rw-r--r--library/cpp/ipreg/split.cpp54
-rw-r--r--library/cpp/ipreg/split.h13
-rw-r--r--library/cpp/ipreg/stopwatch.cpp53
-rw-r--r--library/cpp/ipreg/stopwatch.h25
-rw-r--r--library/cpp/ipreg/util_helpers.cpp705
-rw-r--r--library/cpp/ipreg/util_helpers.h65
-rw-r--r--library/cpp/ipreg/writer.cpp91
-rw-r--r--library/cpp/ipreg/writer.h62
-rw-r--r--library/cpp/ipreg/ya.make26
-rw-r--r--library/cpp/langmask/CMakeLists.txt9
-rw-r--r--library/cpp/langmask/proto/CMakeLists.darwin-x86_64.txt43
-rw-r--r--library/cpp/langmask/proto/CMakeLists.linux-aarch64.txt44
-rw-r--r--library/cpp/langmask/proto/CMakeLists.linux-x86_64.txt44
-rw-r--r--library/cpp/langmask/proto/CMakeLists.txt17
-rw-r--r--library/cpp/langmask/proto/CMakeLists.windows-x86_64.txt43
-rw-r--r--library/cpp/langmask/proto/langmask.proto6
-rw-r--r--library/cpp/langmask/proto/ya.make11
-rw-r--r--library/cpp/microbdb/CMakeLists.darwin-x86_64.txt56
-rw-r--r--library/cpp/microbdb/CMakeLists.linux-aarch64.txt57
-rw-r--r--library/cpp/microbdb/CMakeLists.linux-x86_64.txt57
-rw-r--r--library/cpp/microbdb/CMakeLists.txt17
-rw-r--r--library/cpp/microbdb/CMakeLists.windows-x86_64.txt56
-rw-r--r--library/cpp/microbdb/align.h17
-rw-r--r--library/cpp/microbdb/compressed.h520
-rw-r--r--library/cpp/microbdb/extinfo.h127
-rw-r--r--library/cpp/microbdb/file.cpp220
-rw-r--r--library/cpp/microbdb/file.h225
-rw-r--r--library/cpp/microbdb/hashes.h250
-rw-r--r--library/cpp/microbdb/header.cpp91
-rw-r--r--library/cpp/microbdb/header.h159
-rw-r--r--library/cpp/microbdb/heap.h143
-rw-r--r--library/cpp/microbdb/input.h1027
-rw-r--r--library/cpp/microbdb/microbdb.cpp1
-rw-r--r--library/cpp/microbdb/microbdb.h54
-rw-r--r--library/cpp/microbdb/noextinfo.proto4
-rw-r--r--library/cpp/microbdb/output.h1049
-rw-r--r--library/cpp/microbdb/powersorter.h667
-rw-r--r--library/cpp/microbdb/reader.h354
-rw-r--r--library/cpp/microbdb/safeopen.h792
-rw-r--r--library/cpp/microbdb/sorter.h677
-rw-r--r--library/cpp/microbdb/sorterdef.h19
-rw-r--r--library/cpp/microbdb/utility.h75
-rw-r--r--library/cpp/microbdb/wrappers.h637
-rw-r--r--library/cpp/microbdb/ya.make36
-rw-r--r--library/cpp/on_disk/CMakeLists.txt1
-rw-r--r--library/cpp/on_disk/st_hash/CMakeLists.darwin-x86_64.txt18
-rw-r--r--library/cpp/on_disk/st_hash/CMakeLists.linux-aarch64.txt19
-rw-r--r--library/cpp/on_disk/st_hash/CMakeLists.linux-x86_64.txt19
-rw-r--r--library/cpp/on_disk/st_hash/CMakeLists.txt17
-rw-r--r--library/cpp/on_disk/st_hash/CMakeLists.windows-x86_64.txt18
-rw-r--r--library/cpp/on_disk/st_hash/fake.cpp4
-rw-r--r--library/cpp/on_disk/st_hash/save_stl.h84
-rw-r--r--library/cpp/on_disk/st_hash/static_hash.h420
-rw-r--r--library/cpp/on_disk/st_hash/static_hash_map.h59
-rw-r--r--library/cpp/on_disk/st_hash/sthash_iterators.h334
-rw-r--r--library/cpp/on_disk/st_hash/ya.make15
-rw-r--r--library/cpp/regex/CMakeLists.darwin-x86_64.txt1
-rw-r--r--library/cpp/regex/CMakeLists.linux-aarch64.txt1
-rw-r--r--library/cpp/regex/CMakeLists.linux-x86_64.txt1
-rw-r--r--library/cpp/regex/CMakeLists.windows-x86_64.txt1
-rw-r--r--library/cpp/regex/glob/CMakeLists.darwin-x86_64.txt19
-rw-r--r--library/cpp/regex/glob/CMakeLists.linux-aarch64.txt20
-rw-r--r--library/cpp/regex/glob/CMakeLists.linux-x86_64.txt20
-rw-r--r--library/cpp/regex/glob/CMakeLists.txt17
-rw-r--r--library/cpp/regex/glob/CMakeLists.windows-x86_64.txt19
-rw-r--r--library/cpp/regex/glob/glob.cpp921
-rw-r--r--library/cpp/regex/glob/glob_compat.h73
-rw-r--r--library/cpp/regex/glob/glob_iterator.cpp1
-rw-r--r--library/cpp/regex/glob/glob_iterator.h36
-rw-r--r--library/cpp/regex/glob/ya.make12
-rw-r--r--library/cpp/reverse_geocoder/CMakeLists.txt11
-rw-r--r--library/cpp/reverse_geocoder/core/CMakeLists.darwin-x86_64.txt35
-rw-r--r--library/cpp/reverse_geocoder/core/CMakeLists.linux-aarch64.txt36
-rw-r--r--library/cpp/reverse_geocoder/core/CMakeLists.linux-x86_64.txt36
-rw-r--r--library/cpp/reverse_geocoder/core/CMakeLists.txt17
-rw-r--r--library/cpp/reverse_geocoder/core/CMakeLists.windows-x86_64.txt35
-rw-r--r--library/cpp/reverse_geocoder/core/area_box.cpp9
-rw-r--r--library/cpp/reverse_geocoder/core/area_box.h34
-rw-r--r--library/cpp/reverse_geocoder/core/bbox.cpp1
-rw-r--r--library/cpp/reverse_geocoder/core/bbox.h66
-rw-r--r--library/cpp/reverse_geocoder/core/common.cpp1
-rw-r--r--library/cpp/reverse_geocoder/core/common.h24
-rw-r--r--library/cpp/reverse_geocoder/core/edge.cpp1
-rw-r--r--library/cpp/reverse_geocoder/core/edge.h101
-rw-r--r--library/cpp/reverse_geocoder/core/geo_data/debug.cpp74
-rw-r--r--library/cpp/reverse_geocoder/core/geo_data/debug.h16
-rw-r--r--library/cpp/reverse_geocoder/core/geo_data/def.cpp1
-rw-r--r--library/cpp/reverse_geocoder/core/geo_data/def.h35
-rw-r--r--library/cpp/reverse_geocoder/core/geo_data/geo_data.cpp1
-rw-r--r--library/cpp/reverse_geocoder/core/geo_data/geo_data.h24
-rw-r--r--library/cpp/reverse_geocoder/core/geo_data/map.cpp203
-rw-r--r--library/cpp/reverse_geocoder/core/geo_data/map.h89
-rw-r--r--library/cpp/reverse_geocoder/core/geo_data/proxy.cpp1
-rw-r--r--library/cpp/reverse_geocoder/core/geo_data/proxy.h68
-rw-r--r--library/cpp/reverse_geocoder/core/kv.cpp1
-rw-r--r--library/cpp/reverse_geocoder/core/kv.h13
-rw-r--r--library/cpp/reverse_geocoder/core/location.cpp1
-rw-r--r--library/cpp/reverse_geocoder/core/location.h21
-rw-r--r--library/cpp/reverse_geocoder/core/part.cpp29
-rw-r--r--library/cpp/reverse_geocoder/core/part.h26
-rw-r--r--library/cpp/reverse_geocoder/core/point.cpp1
-rw-r--r--library/cpp/reverse_geocoder/core/point.h52
-rw-r--r--library/cpp/reverse_geocoder/core/polygon.cpp91
-rw-r--r--library/cpp/reverse_geocoder/core/polygon.h73
-rw-r--r--library/cpp/reverse_geocoder/core/region.cpp1
-rw-r--r--library/cpp/reverse_geocoder/core/region.h37
-rw-r--r--library/cpp/reverse_geocoder/core/reverse_geocoder.cpp182
-rw-r--r--library/cpp/reverse_geocoder/core/reverse_geocoder.h73
-rw-r--r--library/cpp/reverse_geocoder/core/ya.make28
-rw-r--r--library/cpp/reverse_geocoder/library/CMakeLists.darwin-x86_64.txt21
-rw-r--r--library/cpp/reverse_geocoder/library/CMakeLists.linux-aarch64.txt22
-rw-r--r--library/cpp/reverse_geocoder/library/CMakeLists.linux-x86_64.txt22
-rw-r--r--library/cpp/reverse_geocoder/library/CMakeLists.txt17
-rw-r--r--library/cpp/reverse_geocoder/library/CMakeLists.windows-x86_64.txt21
-rw-r--r--library/cpp/reverse_geocoder/library/block_allocator.cpp40
-rw-r--r--library/cpp/reverse_geocoder/library/block_allocator.h64
-rw-r--r--library/cpp/reverse_geocoder/library/fs.cpp18
-rw-r--r--library/cpp/reverse_geocoder/library/fs.h19
-rw-r--r--library/cpp/reverse_geocoder/library/log.cpp111
-rw-r--r--library/cpp/reverse_geocoder/library/log.h65
-rw-r--r--library/cpp/reverse_geocoder/library/memory.h23
-rw-r--r--library/cpp/reverse_geocoder/library/pool_allocator.cpp17
-rw-r--r--library/cpp/reverse_geocoder/library/pool_allocator.h42
-rw-r--r--library/cpp/reverse_geocoder/library/system.h3
-rw-r--r--library/cpp/reverse_geocoder/library/unaligned_iter.cpp1
-rw-r--r--library/cpp/reverse_geocoder/library/unaligned_iter.h64
-rw-r--r--library/cpp/reverse_geocoder/library/ya.make11
-rw-r--r--library/cpp/reverse_geocoder/proto/CMakeLists.darwin-x86_64.txt56
-rw-r--r--library/cpp/reverse_geocoder/proto/CMakeLists.linux-aarch64.txt57
-rw-r--r--library/cpp/reverse_geocoder/proto/CMakeLists.linux-x86_64.txt57
-rw-r--r--library/cpp/reverse_geocoder/proto/CMakeLists.txt17
-rw-r--r--library/cpp/reverse_geocoder/proto/CMakeLists.windows-x86_64.txt56
-rw-r--r--library/cpp/reverse_geocoder/proto/geo_data.proto42
-rw-r--r--library/cpp/reverse_geocoder/proto/region.proto32
-rw-r--r--library/cpp/reverse_geocoder/proto/ya.make10
-rw-r--r--library/cpp/robots_txt/CMakeLists.darwin-x86_64.txt26
-rw-r--r--library/cpp/robots_txt/CMakeLists.linux-aarch64.txt27
-rw-r--r--library/cpp/robots_txt/CMakeLists.linux-x86_64.txt27
-rw-r--r--library/cpp/robots_txt/CMakeLists.txt17
-rw-r--r--library/cpp/robots_txt/CMakeLists.windows-x86_64.txt26
-rw-r--r--library/cpp/robots_txt/constants.h9
-rw-r--r--library/cpp/robots_txt/prefix_tree.cpp172
-rw-r--r--library/cpp/robots_txt/prefix_tree.h47
-rw-r--r--library/cpp/robots_txt/prefix_tree_rules_handler.cpp706
-rw-r--r--library/cpp/robots_txt/robots_txt.h605
-rw-r--r--library/cpp/robots_txt/robots_txt_parser.cpp116
-rw-r--r--library/cpp/robots_txt/robots_txt_parser.h38
-rw-r--r--library/cpp/robots_txt/robotstxtcfg.h3
-rw-r--r--library/cpp/robots_txt/robotstxtcfg/CMakeLists.darwin-x86_64.txt20
-rw-r--r--library/cpp/robots_txt/robotstxtcfg/CMakeLists.linux-aarch64.txt21
-rw-r--r--library/cpp/robots_txt/robotstxtcfg/CMakeLists.linux-x86_64.txt21
-rw-r--r--library/cpp/robots_txt/robotstxtcfg/CMakeLists.txt17
-rw-r--r--library/cpp/robots_txt/robotstxtcfg/CMakeLists.windows-x86_64.txt20
-rw-r--r--library/cpp/robots_txt/robotstxtcfg/bot_id_set.cpp2
-rw-r--r--library/cpp/robots_txt/robotstxtcfg/bot_id_set.h132
-rw-r--r--library/cpp/robots_txt/robotstxtcfg/robotstxtcfg.cpp2
-rw-r--r--library/cpp/robots_txt/robotstxtcfg/robotstxtcfg.h11
-rw-r--r--library/cpp/robots_txt/robotstxtcfg/user_agents.cpp2
-rw-r--r--library/cpp/robots_txt/robotstxtcfg/user_agents.h303
-rw-r--r--library/cpp/robots_txt/robotstxtcfg/ya.make13
-rw-r--r--library/cpp/robots_txt/rules_handler.cpp514
-rw-r--r--library/cpp/robots_txt/ya.make18
-rw-r--r--library/cpp/yconf/CMakeLists.darwin-x86_64.txt19
-rw-r--r--library/cpp/yconf/CMakeLists.linux-aarch64.txt20
-rw-r--r--library/cpp/yconf/CMakeLists.linux-x86_64.txt20
-rw-r--r--library/cpp/yconf/CMakeLists.txt17
-rw-r--r--library/cpp/yconf/CMakeLists.windows-x86_64.txt19
-rw-r--r--mapreduce/CMakeLists.txt9
-rw-r--r--mapreduce/yt/CMakeLists.txt9
-rw-r--r--mapreduce/yt/interface/CMakeLists.txt9
-rw-r--r--mapreduce/yt/interface/protos/CMakeLists.darwin-x86_64.txt57
-rw-r--r--mapreduce/yt/interface/protos/CMakeLists.linux-aarch64.txt58
-rw-r--r--mapreduce/yt/interface/protos/CMakeLists.linux-x86_64.txt58
-rw-r--r--mapreduce/yt/interface/protos/CMakeLists.txt17
-rw-r--r--mapreduce/yt/interface/protos/CMakeLists.windows-x86_64.txt57
-rw-r--r--ydb/library/yql/public/CMakeLists.txt1
-rw-r--r--ydb/library/yql/public/purecalc/CMakeLists.darwin-x86_64.txt27
-rw-r--r--ydb/library/yql/public/purecalc/CMakeLists.linux-aarch64.txt28
-rw-r--r--ydb/library/yql/public/purecalc/CMakeLists.linux-x86_64.txt28
-rw-r--r--ydb/library/yql/public/purecalc/CMakeLists.txt17
-rw-r--r--ydb/library/yql/public/purecalc/CMakeLists.windows-x86_64.txt27
-rw-r--r--ydb/library/yql/public/purecalc/common/CMakeLists.darwin-x86_64.txt65
-rw-r--r--ydb/library/yql/public/purecalc/common/CMakeLists.linux-aarch64.txt66
-rw-r--r--ydb/library/yql/public/purecalc/common/CMakeLists.linux-x86_64.txt66
-rw-r--r--ydb/library/yql/public/purecalc/common/CMakeLists.txt17
-rw-r--r--ydb/library/yql/public/purecalc/common/CMakeLists.windows-x86_64.txt65
-rw-r--r--ydb/library/yql/public/purecalc/common/compile_mkql.cpp115
-rw-r--r--ydb/library/yql/public/purecalc/common/compile_mkql.h17
-rw-r--r--ydb/library/yql/public/purecalc/common/fwd.cpp1
-rw-r--r--ydb/library/yql/public/purecalc/common/fwd.h56
-rw-r--r--ydb/library/yql/public/purecalc/common/inspect_input.cpp33
-rw-r--r--ydb/library/yql/public/purecalc/common/inspect_input.h7
-rw-r--r--ydb/library/yql/public/purecalc/common/interface.cpp116
-rw-r--r--ydb/library/yql/public/purecalc/common/interface.h1137
-rw-r--r--ydb/library/yql/public/purecalc/common/logger_init.cpp32
-rw-r--r--ydb/library/yql/public/purecalc/common/logger_init.h10
-rw-r--r--ydb/library/yql/public/purecalc/common/names.cpp16
-rw-r--r--ydb/library/yql/public/purecalc/common/names.h16
-rw-r--r--ydb/library/yql/public/purecalc/common/processor_mode.cpp1
-rw-r--r--ydb/library/yql/public/purecalc/common/processor_mode.h11
-rw-r--r--ydb/library/yql/public/purecalc/common/program_factory.cpp144
-rw-r--r--ydb/library/yql/public/purecalc/common/program_factory.h46
-rw-r--r--ydb/library/yql/public/purecalc/common/transformations/align_output_schema.cpp93
-rw-r--r--ydb/library/yql/public/purecalc/common/transformations/align_output_schema.h21
-rw-r--r--ydb/library/yql/public/purecalc/common/transformations/extract_used_columns.cpp96
-rw-r--r--ydb/library/yql/public/purecalc/common/transformations/extract_used_columns.h29
-rw-r--r--ydb/library/yql/public/purecalc/common/transformations/output_columns_filter.cpp100
-rw-r--r--ydb/library/yql/public/purecalc/common/transformations/output_columns_filter.h18
-rw-r--r--ydb/library/yql/public/purecalc/common/transformations/replace_table_reads.cpp216
-rw-r--r--ydb/library/yql/public/purecalc/common/transformations/replace_table_reads.h28
-rw-r--r--ydb/library/yql/public/purecalc/common/transformations/type_annotation.cpp228
-rw-r--r--ydb/library/yql/public/purecalc/common/transformations/type_annotation.h28
-rw-r--r--ydb/library/yql/public/purecalc/common/type_from_schema.cpp255
-rw-r--r--ydb/library/yql/public/purecalc/common/type_from_schema.h36
-rw-r--r--ydb/library/yql/public/purecalc/common/worker.cpp566
-rw-r--r--ydb/library/yql/public/purecalc/common/worker.h168
-rw-r--r--ydb/library/yql/public/purecalc/common/worker_factory.cpp454
-rw-r--r--ydb/library/yql/public/purecalc/common/worker_factory.h157
-rw-r--r--ydb/library/yql/public/purecalc/common/wrappers.cpp1
-rw-r--r--ydb/library/yql/public/purecalc/common/wrappers.h70
-rw-r--r--ydb/library/yql/public/purecalc/common/ya.make47
-rw-r--r--ydb/library/yql/public/purecalc/examples/CMakeLists.txt11
-rw-r--r--ydb/library/yql/public/purecalc/examples/protobuf/CMakeLists.darwin-x86_64.txt64
-rw-r--r--ydb/library/yql/public/purecalc/examples/protobuf/CMakeLists.linux-aarch64.txt67
-rw-r--r--ydb/library/yql/public/purecalc/examples/protobuf/CMakeLists.linux-x86_64.txt69
-rw-r--r--ydb/library/yql/public/purecalc/examples/protobuf/CMakeLists.txt17
-rw-r--r--ydb/library/yql/public/purecalc/examples/protobuf/CMakeLists.windows-x86_64.txt57
-rw-r--r--ydb/library/yql/public/purecalc/examples/protobuf/main.cpp133
-rw-r--r--ydb/library/yql/public/purecalc/examples/protobuf/main.proto11
-rw-r--r--ydb/library/yql/public/purecalc/examples/protobuf/ut/canondata/exectest.run_protobuf_/log.out18
-rw-r--r--ydb/library/yql/public/purecalc/examples/protobuf/ut/canondata/result.json5
-rw-r--r--ydb/library/yql/public/purecalc/examples/protobuf/ut/ya.make11
-rw-r--r--ydb/library/yql/public/purecalc/examples/protobuf/ya.make27
-rw-r--r--ydb/library/yql/public/purecalc/examples/protobuf_pull_list/CMakeLists.darwin-x86_64.txt64
-rw-r--r--ydb/library/yql/public/purecalc/examples/protobuf_pull_list/CMakeLists.linux-aarch64.txt67
-rw-r--r--ydb/library/yql/public/purecalc/examples/protobuf_pull_list/CMakeLists.linux-x86_64.txt69
-rw-r--r--ydb/library/yql/public/purecalc/examples/protobuf_pull_list/CMakeLists.txt17
-rw-r--r--ydb/library/yql/public/purecalc/examples/protobuf_pull_list/CMakeLists.windows-x86_64.txt57
-rw-r--r--ydb/library/yql/public/purecalc/examples/protobuf_pull_list/main.cpp75
-rw-r--r--ydb/library/yql/public/purecalc/examples/protobuf_pull_list/main.proto10
-rw-r--r--ydb/library/yql/public/purecalc/examples/protobuf_pull_list/ut/canondata/exectest.run_protobuf_pull_list_/log.out6
-rw-r--r--ydb/library/yql/public/purecalc/examples/protobuf_pull_list/ut/canondata/result.json6
-rw-r--r--ydb/library/yql/public/purecalc/examples/protobuf_pull_list/ut/ya.make9
-rw-r--r--ydb/library/yql/public/purecalc/examples/protobuf_pull_list/ya.make22
-rw-r--r--ydb/library/yql/public/purecalc/examples/skiff_pull_list/CMakeLists.darwin-x86_64.txt34
-rw-r--r--ydb/library/yql/public/purecalc/examples/skiff_pull_list/CMakeLists.linux-aarch64.txt37
-rw-r--r--ydb/library/yql/public/purecalc/examples/skiff_pull_list/CMakeLists.linux-x86_64.txt39
-rw-r--r--ydb/library/yql/public/purecalc/examples/skiff_pull_list/CMakeLists.txt17
-rw-r--r--ydb/library/yql/public/purecalc/examples/skiff_pull_list/CMakeLists.windows-x86_64.txt27
-rw-r--r--ydb/library/yql/public/purecalc/examples/skiff_pull_list/main.cpp92
-rw-r--r--ydb/library/yql/public/purecalc/examples/skiff_pull_list/ya.make14
-rw-r--r--ydb/library/yql/public/purecalc/examples/ya.make7
-rw-r--r--ydb/library/yql/public/purecalc/helpers/CMakeLists.darwin-x86_64.txt18
-rw-r--r--ydb/library/yql/public/purecalc/helpers/CMakeLists.linux-aarch64.txt19
-rw-r--r--ydb/library/yql/public/purecalc/helpers/CMakeLists.linux-x86_64.txt19
-rw-r--r--ydb/library/yql/public/purecalc/helpers/CMakeLists.txt17
-rw-r--r--ydb/library/yql/public/purecalc/helpers/CMakeLists.windows-x86_64.txt18
-rw-r--r--ydb/library/yql/public/purecalc/helpers/protobuf/CMakeLists.darwin-x86_64.txt20
-rw-r--r--ydb/library/yql/public/purecalc/helpers/protobuf/CMakeLists.linux-aarch64.txt21
-rw-r--r--ydb/library/yql/public/purecalc/helpers/protobuf/CMakeLists.linux-x86_64.txt21
-rw-r--r--ydb/library/yql/public/purecalc/helpers/protobuf/CMakeLists.txt17
-rw-r--r--ydb/library/yql/public/purecalc/helpers/protobuf/CMakeLists.windows-x86_64.txt20
-rw-r--r--ydb/library/yql/public/purecalc/helpers/protobuf/schema_from_proto.cpp202
-rw-r--r--ydb/library/yql/public/purecalc/helpers/protobuf/schema_from_proto.h60
-rw-r--r--ydb/library/yql/public/purecalc/helpers/protobuf/ya.make14
-rw-r--r--ydb/library/yql/public/purecalc/helpers/stream/CMakeLists.darwin-x86_64.txt21
-rw-r--r--ydb/library/yql/public/purecalc/helpers/stream/CMakeLists.linux-aarch64.txt22
-rw-r--r--ydb/library/yql/public/purecalc/helpers/stream/CMakeLists.linux-x86_64.txt22
-rw-r--r--ydb/library/yql/public/purecalc/helpers/stream/CMakeLists.txt17
-rw-r--r--ydb/library/yql/public/purecalc/helpers/stream/CMakeLists.windows-x86_64.txt21
-rw-r--r--ydb/library/yql/public/purecalc/helpers/stream/stream_from_vector.cpp1
-rw-r--r--ydb/library/yql/public/purecalc/helpers/stream/stream_from_vector.h40
-rw-r--r--ydb/library/yql/public/purecalc/helpers/stream/ya.make13
-rw-r--r--ydb/library/yql/public/purecalc/helpers/ya.make8
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/CMakeLists.txt11
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/mkql/CMakeLists.darwin-x86_64.txt27
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/mkql/CMakeLists.linux-aarch64.txt28
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/mkql/CMakeLists.linux-x86_64.txt28
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/mkql/CMakeLists.txt17
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/mkql/CMakeLists.windows-x86_64.txt27
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/mkql/spec.cpp934
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/mkql/spec.h231
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/mkql/ut/CMakeLists.darwin-x86_64.txt77
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/mkql/ut/CMakeLists.linux-aarch64.txt80
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/mkql/ut/CMakeLists.linux-x86_64.txt82
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/mkql/ut/CMakeLists.txt17
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/mkql/ut/CMakeLists.windows-x86_64.txt70
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/mkql/ut/test.inl777
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/mkql/ut/test_spec.cpp325
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/mkql/ut/ya.make20
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/mkql/ya.make25
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/protobuf/CMakeLists.darwin-x86_64.txt24
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/protobuf/CMakeLists.linux-aarch64.txt25
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/protobuf/CMakeLists.linux-x86_64.txt25
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/protobuf/CMakeLists.txt17
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/protobuf/CMakeLists.windows-x86_64.txt24
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/protobuf/proto_variant.cpp1
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/protobuf/proto_variant.h80
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/protobuf/spec.cpp1
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/protobuf/spec.h147
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/protobuf/ut/CMakeLists.darwin-x86_64.txt71
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/protobuf/ut/CMakeLists.linux-aarch64.txt74
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/protobuf/ut/CMakeLists.linux-x86_64.txt76
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/protobuf/ut/CMakeLists.txt17
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/protobuf/ut/CMakeLists.windows-x86_64.txt64
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/protobuf/ut/test_spec.cpp995
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/protobuf/ut/ya.make19
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/protobuf/ya.make21
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/protobuf_raw/CMakeLists.darwin-x86_64.txt23
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/protobuf_raw/CMakeLists.linux-aarch64.txt24
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/protobuf_raw/CMakeLists.linux-x86_64.txt24
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/protobuf_raw/CMakeLists.txt17
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/protobuf_raw/CMakeLists.windows-x86_64.txt23
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/protobuf_raw/proto_holder.cpp1
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/protobuf_raw/proto_holder.h31
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/protobuf_raw/spec.cpp1064
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/protobuf_raw/spec.h257
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/protobuf_raw/ya.make16
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/ut/ya.make4
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/ya.make9
-rw-r--r--ydb/library/yql/public/purecalc/purecalc.cpp1
-rw-r--r--ydb/library/yql/public/purecalc/purecalc.h3
-rw-r--r--ydb/library/yql/public/purecalc/ut/CMakeLists.darwin-x86_64.txt78
-rw-r--r--ydb/library/yql/public/purecalc/ut/CMakeLists.linux-aarch64.txt81
-rw-r--r--ydb/library/yql/public/purecalc/ut/CMakeLists.linux-x86_64.txt83
-rw-r--r--ydb/library/yql/public/purecalc/ut/CMakeLists.txt17
-rw-r--r--ydb/library/yql/public/purecalc/ut/CMakeLists.windows-x86_64.txt71
-rw-r--r--ydb/library/yql/public/purecalc/ut/empty_stream.h20
-rw-r--r--ydb/library/yql/public/purecalc/ut/fake_spec.cpp36
-rw-r--r--ydb/library/yql/public/purecalc/ut/fake_spec.h54
-rw-r--r--ydb/library/yql/public/purecalc/ut/lib/CMakeLists.darwin-x86_64.txt19
-rw-r--r--ydb/library/yql/public/purecalc/ut/lib/CMakeLists.linux-aarch64.txt20
-rw-r--r--ydb/library/yql/public/purecalc/ut/lib/CMakeLists.linux-x86_64.txt20
-rw-r--r--ydb/library/yql/public/purecalc/ut/lib/CMakeLists.txt17
-rw-r--r--ydb/library/yql/public/purecalc/ut/lib/CMakeLists.windows-x86_64.txt19
-rw-r--r--ydb/library/yql/public/purecalc/ut/lib/helpers.cpp55
-rw-r--r--ydb/library/yql/public/purecalc/ut/lib/helpers.h18
-rw-r--r--ydb/library/yql/public/purecalc/ut/lib/ya.make13
-rw-r--r--ydb/library/yql/public/purecalc/ut/protos/CMakeLists.darwin-x86_64.txt43
-rw-r--r--ydb/library/yql/public/purecalc/ut/protos/CMakeLists.linux-aarch64.txt44
-rw-r--r--ydb/library/yql/public/purecalc/ut/protos/CMakeLists.linux-x86_64.txt44
-rw-r--r--ydb/library/yql/public/purecalc/ut/protos/CMakeLists.txt17
-rw-r--r--ydb/library/yql/public/purecalc/ut/protos/CMakeLists.windows-x86_64.txt43
-rw-r--r--ydb/library/yql/public/purecalc/ut/protos/test_structs.proto122
-rw-r--r--ydb/library/yql/public/purecalc/ut/protos/ya.make9
-rw-r--r--ydb/library/yql/public/purecalc/ut/test_eval.cpp30
-rw-r--r--ydb/library/yql/public/purecalc/ut/test_pool.cpp184
-rw-r--r--ydb/library/yql/public/purecalc/ut/test_schema.cpp1
-rw-r--r--ydb/library/yql/public/purecalc/ut/test_sexpr.cpp55
-rw-r--r--ydb/library/yql/public/purecalc/ut/test_sql.cpp205
-rw-r--r--ydb/library/yql/public/purecalc/ut/test_udf.cpp195
-rw-r--r--ydb/library/yql/public/purecalc/ut/test_user_data.cpp62
-rw-r--r--ydb/library/yql/public/purecalc/ut/ya.make26
-rw-r--r--ydb/library/yql/public/purecalc/ya.make19
-rw-r--r--ydb/library/yql/public/ya.make4
-rw-r--r--yql/CMakeLists.txt9
-rw-r--r--yql/udfs/CMakeLists.txt9
-rw-r--r--yql/udfs/common/CMakeLists.txt10
-rw-r--r--yql/udfs/common/ip/CMakeLists.darwin-x86_64.txt36
-rw-r--r--yql/udfs/common/ip/CMakeLists.linux-aarch64.txt38
-rw-r--r--yql/udfs/common/ip/CMakeLists.linux-x86_64.txt38
-rw-r--r--yql/udfs/common/ip/CMakeLists.txt17
-rw-r--r--yql/udfs/common/ip/CMakeLists.windows-x86_64.txt36
-rw-r--r--yql/udfs/common/url/CMakeLists.darwin-x86_64.txt42
-rw-r--r--yql/udfs/common/url/CMakeLists.linux-aarch64.txt44
-rw-r--r--yql/udfs/common/url/CMakeLists.linux-x86_64.txt44
-rw-r--r--yql/udfs/common/url/CMakeLists.txt17
-rw-r--r--yql/udfs/common/url/CMakeLists.windows-x86_64.txt42
-rw-r--r--yweb/CMakeLists.txt13
-rw-r--r--yweb/config/CMakeLists.darwin-x86_64.txt44
-rw-r--r--yweb/config/CMakeLists.linux-aarch64.txt45
-rw-r--r--yweb/config/CMakeLists.linux-x86_64.txt45
-rw-r--r--yweb/config/CMakeLists.txt17
-rw-r--r--yweb/config/CMakeLists.windows-x86_64.txt44
-rw-r--r--yweb/protos/CMakeLists.darwin-x86_64.txt387
-rw-r--r--yweb/protos/CMakeLists.linux-aarch64.txt388
-rw-r--r--yweb/protos/CMakeLists.linux-x86_64.txt388
-rw-r--r--yweb/protos/CMakeLists.txt17
-rw-r--r--yweb/protos/CMakeLists.windows-x86_64.txt387
-rw-r--r--yweb/protos/robotzones/CMakeLists.darwin-x86_64.txt56
-rw-r--r--yweb/protos/robotzones/CMakeLists.linux-aarch64.txt57
-rw-r--r--yweb/protos/robotzones/CMakeLists.linux-x86_64.txt57
-rw-r--r--yweb/protos/robotzones/CMakeLists.txt17
-rw-r--r--yweb/protos/robotzones/CMakeLists.windows-x86_64.txt56
-rw-r--r--yweb/realtime/CMakeLists.txt9
-rw-r--r--yweb/realtime/protos/CMakeLists.darwin-x86_64.txt56
-rw-r--r--yweb/realtime/protos/CMakeLists.linux-aarch64.txt57
-rw-r--r--yweb/realtime/protos/CMakeLists.linux-x86_64.txt57
-rw-r--r--yweb/realtime/protos/CMakeLists.txt17
-rw-r--r--yweb/realtime/protos/CMakeLists.windows-x86_64.txt56
-rw-r--r--yweb/robot/CMakeLists.txt10
-rw-r--r--yweb/robot/dbscheeme/CMakeLists.darwin-x86_64.txt32
-rw-r--r--yweb/robot/dbscheeme/CMakeLists.linux-aarch64.txt33
-rw-r--r--yweb/robot/dbscheeme/CMakeLists.linux-x86_64.txt33
-rw-r--r--yweb/robot/dbscheeme/CMakeLists.txt17
-rw-r--r--yweb/robot/dbscheeme/CMakeLists.windows-x86_64.txt32
-rw-r--r--yweb/robot/kiwi_queries/CMakeLists.txt9
-rw-r--r--yweb/robot/kiwi_queries/others/CMakeLists.txt9
-rw-r--r--yweb/robot/kiwi_queries/others/lib/CMakeLists.txt9
-rw-r--r--yweb/robot/kiwi_queries/others/lib/indexannportion/CMakeLists.txt9
-rw-r--r--yweb/robot/kiwi_queries/others/lib/indexannportion/input/CMakeLists.darwin-x86_64.txt43
-rw-r--r--yweb/robot/kiwi_queries/others/lib/indexannportion/input/CMakeLists.linux-aarch64.txt44
-rw-r--r--yweb/robot/kiwi_queries/others/lib/indexannportion/input/CMakeLists.linux-x86_64.txt44
-rw-r--r--yweb/robot/kiwi_queries/others/lib/indexannportion/input/CMakeLists.txt17
-rw-r--r--yweb/robot/kiwi_queries/others/lib/indexannportion/input/CMakeLists.windows-x86_64.txt43
-rw-r--r--yweb/urlfilter/CMakeLists.darwin-x86_64.txt27
-rw-r--r--yweb/urlfilter/CMakeLists.linux-aarch64.txt28
-rw-r--r--yweb/urlfilter/CMakeLists.linux-x86_64.txt28
-rw-r--r--yweb/urlfilter/CMakeLists.txt17
-rw-r--r--yweb/urlfilter/CMakeLists.windows-x86_64.txt27
-rw-r--r--zora/CMakeLists.txt9
-rw-r--r--zora/zora/CMakeLists.txt9
-rw-r--r--zora/zora/proto/CMakeLists.txt9
-rw-r--r--zora/zora/proto/common/CMakeLists.darwin-x86_64.txt43
-rw-r--r--zora/zora/proto/common/CMakeLists.linux-aarch64.txt44
-rw-r--r--zora/zora/proto/common/CMakeLists.linux-x86_64.txt44
-rw-r--r--zora/zora/proto/common/CMakeLists.txt17
-rw-r--r--zora/zora/proto/common/CMakeLists.windows-x86_64.txt43
698 files changed, 82714 insertions, 0 deletions
diff --git a/CMakeLists.darwin-x86_64.txt b/CMakeLists.darwin-x86_64.txt
index 075d57df50..a68b571b8b 100644
--- a/CMakeLists.darwin-x86_64.txt
+++ b/CMakeLists.darwin-x86_64.txt
@@ -13,3 +13,9 @@ add_subdirectory(util)
add_subdirectory(yt)
add_subdirectory(certs)
add_subdirectory(ydb)
+add_subdirectory(yql)
+add_subdirectory(kernel)
+add_subdirectory(yweb)
+add_subdirectory(mapreduce)
+add_subdirectory(zora)
+add_subdirectory(geobase)
diff --git a/CMakeLists.linux-aarch64.txt b/CMakeLists.linux-aarch64.txt
index 075d57df50..a68b571b8b 100644
--- a/CMakeLists.linux-aarch64.txt
+++ b/CMakeLists.linux-aarch64.txt
@@ -13,3 +13,9 @@ add_subdirectory(util)
add_subdirectory(yt)
add_subdirectory(certs)
add_subdirectory(ydb)
+add_subdirectory(yql)
+add_subdirectory(kernel)
+add_subdirectory(yweb)
+add_subdirectory(mapreduce)
+add_subdirectory(zora)
+add_subdirectory(geobase)
diff --git a/CMakeLists.linux-x86_64.txt b/CMakeLists.linux-x86_64.txt
index 075d57df50..a68b571b8b 100644
--- a/CMakeLists.linux-x86_64.txt
+++ b/CMakeLists.linux-x86_64.txt
@@ -13,3 +13,9 @@ add_subdirectory(util)
add_subdirectory(yt)
add_subdirectory(certs)
add_subdirectory(ydb)
+add_subdirectory(yql)
+add_subdirectory(kernel)
+add_subdirectory(yweb)
+add_subdirectory(mapreduce)
+add_subdirectory(zora)
+add_subdirectory(geobase)
diff --git a/CMakeLists.windows-x86_64.txt b/CMakeLists.windows-x86_64.txt
index e8667d4e27..b4dc0d7487 100644
--- a/CMakeLists.windows-x86_64.txt
+++ b/CMakeLists.windows-x86_64.txt
@@ -13,3 +13,9 @@ add_subdirectory(library)
add_subdirectory(yt)
add_subdirectory(certs)
add_subdirectory(ydb)
+add_subdirectory(yql)
+add_subdirectory(kernel)
+add_subdirectory(yweb)
+add_subdirectory(mapreduce)
+add_subdirectory(zora)
+add_subdirectory(geobase)
diff --git a/contrib/libs/libc_compat/include/link/link.h b/contrib/libs/libc_compat/include/link/link.h
new file mode 100644
index 0000000000..7352c26166
--- /dev/null
+++ b/contrib/libs/libc_compat/include/link/link.h
@@ -0,0 +1,15 @@
+#pragma once
+
+#ifdef _MSC_VER
+
+#ifdef _cplusplus
+extern "C" {
+#endif
+
+int link(const char *oldpath, const char *newpath);
+
+#ifdef _cplusplus
+}
+#endif
+
+#endif
diff --git a/contrib/tools/ragel5/common/buffer.h b/contrib/tools/ragel5/common/buffer.h
new file mode 100644
index 0000000000..99c4e82d49
--- /dev/null
+++ b/contrib/tools/ragel5/common/buffer.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2003 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _BUFFER_H
+#define _BUFFER_H
+
+#define BUFFER_INITIAL_SIZE 4096
+
+/* An automatically grown buffer for collecting tokens. Always reuses space;
+ * never down resizes. */
+struct Buffer
+{
+ Buffer()
+ {
+ data = (char*) malloc( BUFFER_INITIAL_SIZE );
+ allocated = BUFFER_INITIAL_SIZE;
+ length = 0;
+ }
+ ~Buffer() { free(data); }
+
+ void append( char p )
+ {
+ if ( length == allocated ) {
+ allocated *= 2;
+ data = (char*) realloc( data, allocated );
+ }
+ data[length++] = p;
+ }
+
+ void clear() { length = 0; }
+
+ char *data;
+ int allocated;
+ int length;
+};
+
+#endif /* _BUFFER_H */
diff --git a/contrib/tools/ragel5/common/common.cpp b/contrib/tools/ragel5/common/common.cpp
new file mode 100644
index 0000000000..4484dcbd73
--- /dev/null
+++ b/contrib/tools/ragel5/common/common.cpp
@@ -0,0 +1,296 @@
+/*
+ * Copyright 2006-2007 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "pcheck.h"
+#include "common.h"
+#include <string.h>
+#include <assert.h>
+
+#ifdef _WIN32
+#include <malloc.h>
+#else
+#include <alloca.h>
+#endif
+
+HostType hostTypesC[] =
+{
+ { "char", 0, true, CHAR_MIN, CHAR_MAX, sizeof(char) },
+ { "unsigned", "char", false, 0, UCHAR_MAX, sizeof(unsigned char) },
+ { "short", 0, true, SHRT_MIN, SHRT_MAX, sizeof(short) },
+ { "unsigned", "short", false, 0, USHRT_MAX, sizeof(unsigned short) },
+ { "int", 0, true, INT_MIN, INT_MAX, sizeof(int) },
+ { "unsigned", "int", false, 0, UINT_MAX, sizeof(unsigned int) },
+ { "long", 0, true, LONG_MIN, LONG_MAX, sizeof(long) },
+ { "unsigned", "long", false, 0, (long long)ULONG_MAX, sizeof(unsigned long) }
+};
+
+HostType hostTypesD[] =
+{
+ { "byte", 0, true, CHAR_MIN, CHAR_MAX, 1 },
+ { "ubyte", 0, false, 0, UCHAR_MAX, 1 },
+ { "char", 0, false, 0, UCHAR_MAX, 1 },
+ { "short", 0, true, SHRT_MIN, SHRT_MAX, 2 },
+ { "ushort", 0, false, 0, USHRT_MAX, 2 },
+ { "wchar", 0, false, 0, USHRT_MAX, 2 },
+ { "int", 0, true, INT_MIN, INT_MAX, 4 },
+ { "uint", 0, false, 0, UINT_MAX, 4 },
+ { "dchar", 0, false, 0, UINT_MAX, 4 }
+};
+
+HostType hostTypesJava[] =
+{
+ { "byte", 0, true, CHAR_MIN, CHAR_MAX, 1 },
+ { "short", 0, true, SHRT_MIN, SHRT_MAX, 2 },
+ { "char", 0, false, 0, USHRT_MAX, 2 },
+ { "int", 0, true, INT_MIN, INT_MAX, 4 },
+};
+
+HostType hostTypesRuby[] =
+{
+ { "byte", 0, true, CHAR_MIN, CHAR_MAX, 1 },
+ { "short", 0, true, SHRT_MIN, SHRT_MAX, 2 },
+ { "char", 0, false, 0, USHRT_MAX, 2 },
+ { "int", 0, true, INT_MIN, INT_MAX, 4 },
+};
+
+HostLang hostLangC = { hostTypesC, 8, hostTypesC+0, true };
+HostLang hostLangD = { hostTypesD, 9, hostTypesD+2, true };
+HostLang hostLangJava = { hostTypesJava, 4, hostTypesJava+2, false };
+HostLang hostLangRuby = { hostTypesRuby, 4, hostTypesRuby+2, false };
+
+HostLang *hostLang = &hostLangC;
+HostLangType hostLangType = CCode;
+
+/* Construct a new parameter checker with for paramSpec. */
+ParamCheck::ParamCheck(const char *paramSpec, int argc, char **argv)
+:
+ state(noparam),
+ argOffset(0),
+ curArg(0),
+ iCurArg(1),
+ paramSpec(paramSpec),
+ argc(argc),
+ argv(argv)
+{
+}
+
+/* Check a single option. Returns the index of the next parameter. Sets p to
+ * the arg character if valid, 0 otherwise. Sets parg to the parameter arg if
+ * there is one, NULL otherwise. */
+bool ParamCheck::check()
+{
+ bool requiresParam;
+
+ if ( iCurArg >= argc ) { /* Off the end of the arg list. */
+ state = noparam;
+ return false;
+ }
+
+ if ( argOffset != 0 && *argOffset == 0 ) {
+ /* We are at the end of an arg string. */
+ iCurArg += 1;
+ if ( iCurArg >= argc ) {
+ state = noparam;
+ return false;
+ }
+ argOffset = 0;
+ }
+
+ if ( argOffset == 0 ) {
+ /* Set the current arg. */
+ curArg = argv[iCurArg];
+
+ /* We are at the beginning of an arg string. */
+ if ( argv[iCurArg] == 0 || /* Argv[iCurArg] is null. */
+ argv[iCurArg][0] != '-' || /* Not a param. */
+ argv[iCurArg][1] == 0 ) { /* Only a dash. */
+ parameter = 0;
+ parameterArg = 0;
+
+ iCurArg += 1;
+ state = noparam;
+ return true;
+ }
+ argOffset = argv[iCurArg] + 1;
+ }
+
+ /* Get the arg char. */
+ char argChar = *argOffset;
+
+ /* Loop over all the parms and look for a match. */
+ const char *pSpec = paramSpec;
+ while ( *pSpec != 0 ) {
+ char pSpecChar = *pSpec;
+
+ /* If there is a ':' following the char then
+ * it requires a parm. If a parm is required
+ * then move ahead two in the parmspec. Otherwise
+ * move ahead one in the parm spec. */
+ if ( pSpec[1] == ':' ) {
+ requiresParam = true;
+ pSpec += 2;
+ }
+ else {
+ requiresParam = false;
+ pSpec += 1;
+ }
+
+ /* Do we have a match. */
+ if ( argChar == pSpecChar ) {
+ if ( requiresParam ) {
+ if ( argOffset[1] == 0 ) {
+ /* The param must follow. */
+ if ( iCurArg + 1 == argc ) {
+ /* We are the last arg so there
+ * cannot be a parameter to it. */
+ parameter = argChar;
+ parameterArg = 0;
+ iCurArg += 1;
+ argOffset = 0;
+ state = invalid;
+ return true;
+ }
+ else {
+ /* the parameter to the arg is the next arg. */
+ parameter = pSpecChar;
+ parameterArg = argv[iCurArg + 1];
+ iCurArg += 2;
+ argOffset = 0;
+ state = match;
+ return true;
+ }
+ }
+ else {
+ /* The param for the arg is built in. */
+ parameter = pSpecChar;
+ parameterArg = argOffset + 1;
+ iCurArg += 1;
+ argOffset = 0;
+ state = match;
+ return true;
+ }
+ }
+ else {
+ /* Good, we matched the parm and no
+ * arg is required. */
+ parameter = pSpecChar;
+ parameterArg = 0;
+ argOffset += 1;
+ state = match;
+ return true;
+ }
+ }
+ }
+
+ /* We did not find a match. Bad Argument. */
+ parameter = argChar;
+ parameterArg = 0;
+ argOffset += 1;
+ state = invalid;
+ return true;
+}
+
+void NormalizeWinPath(char* input) {
+ const size_t len = strlen(input);
+ char* res = static_cast<char*>(alloca(len + 1));
+ for (size_t i = 0, j = 0; i <= len; ++i, ++j) {
+ if (input[i] == '\\') {
+ res[j] = '/';
+ if (i < len - 2 && input[i + 1] == '\\')
+ ++i;
+ } else {
+ res[j] = input[i];
+ }
+ }
+ strcpy(input, res);
+}
+
+/* Counts newlines before sending sync. */
+int output_filter::sync( )
+{
+ line += 1;
+ return std::filebuf::sync();
+}
+
+/* Counts newlines before sending data out to file. */
+std::streamsize output_filter::xsputn( const char *s, std::streamsize n )
+{
+ for ( int i = 0; i < n; i++ ) {
+ if ( s[i] == '\n' )
+ line += 1;
+ }
+ return std::filebuf::xsputn( s, n );
+}
+
+/* Scans a string looking for the file extension. If there is a file
+ * extension then pointer returned points to inside the string
+ * passed in. Otherwise returns null. */
+char *findFileExtension( char *stemFile )
+{
+ char *ppos = stemFile + strlen(stemFile) - 1;
+
+ /* Scan backwards from the end looking for the first dot.
+ * If we encounter a '/' before the first dot, then stop the scan. */
+ while ( 1 ) {
+ /* If we found a dot or got to the beginning of the string then
+ * we are done. */
+ if ( ppos == stemFile || *ppos == '.' )
+ break;
+
+ /* If we hit a / then there is no extension. Done. */
+ if ( *ppos == '/' ) {
+ ppos = stemFile;
+ break;
+ }
+ ppos--;
+ }
+
+ /* If we got to the front of the string then bail we
+ * did not find an extension */
+ if ( ppos == stemFile )
+ ppos = 0;
+
+ return ppos;
+}
+
+/* Make a file name from a stem. Removes the old filename suffix and
+ * replaces it with a new one. Returns a newed up string. */
+char *fileNameFromStem( char *stemFile, const char *suffix )
+{
+ int len = strlen( stemFile );
+ assert( len > 0 );
+
+ /* Get the extension. */
+ char *ppos = findFileExtension( stemFile );
+
+ /* If an extension was found, then shorten what we think the len is. */
+ if ( ppos != 0 )
+ len = ppos - stemFile;
+
+ /* Make the return string from the stem and the suffix. */
+ char *retVal = new char[ len + strlen( suffix ) + 1 ];
+ strncpy( retVal, stemFile, len );
+ strcpy( retVal + len, suffix );
+
+ return retVal;
+}
+
+
diff --git a/contrib/tools/ragel5/common/common.h b/contrib/tools/ragel5/common/common.h
new file mode 100644
index 0000000000..aae6f85add
--- /dev/null
+++ b/contrib/tools/ragel5/common/common.h
@@ -0,0 +1,308 @@
+/*
+ * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _COMMON_H
+#define _COMMON_H
+
+#include <fstream>
+#include <climits>
+#include "dlist.h"
+
+typedef unsigned long long Size;
+
+struct Key
+{
+private:
+ long key;
+
+public:
+ friend inline Key operator+(const Key key1, const Key key2);
+ friend inline Key operator-(const Key key1, const Key key2);
+ friend inline Key operator/(const Key key1, const Key key2);
+ friend inline long operator&(const Key key1, const Key key2);
+
+ friend inline bool operator<( const Key key1, const Key key2 );
+ friend inline bool operator<=( const Key key1, const Key key2 );
+ friend inline bool operator>( const Key key1, const Key key2 );
+ friend inline bool operator>=( const Key key1, const Key key2 );
+ friend inline bool operator==( const Key key1, const Key key2 );
+ friend inline bool operator!=( const Key key1, const Key key2 );
+
+ friend struct KeyOps;
+
+ Key( ) {}
+ Key( const Key &key ) : key(key.key) {}
+ Key( long key ) : key(key) {}
+
+ /* Returns the value used to represent the key. This value must be
+ * interpreted based on signedness. */
+ long getVal() const { return key; };
+
+ /* Returns the key casted to a long long. This form of the key does not
+ * require and signedness interpretation. */
+ long long getLongLong() const;
+
+ bool isUpper() const { return ( 'A' <= key && key <= 'Z' ); }
+ bool isLower() const { return ( 'a' <= key && key <= 'z' ); }
+ bool isPrintable() const
+ {
+ return ( 7 <= key && key <= 13 ) || ( 32 <= key && key < 127 );
+ }
+
+ Key toUpper() const
+ { return Key( 'A' + ( key - 'a' ) ); }
+ Key toLower() const
+ { return Key( 'a' + ( key - 'A' ) ); }
+
+ void operator+=( const Key other )
+ {
+ /* FIXME: must be made aware of isSigned. */
+ key += other.key;
+ }
+
+ void operator-=( const Key other )
+ {
+ /* FIXME: must be made aware of isSigned. */
+ key -= other.key;
+ }
+
+ void operator|=( const Key other )
+ {
+ /* FIXME: must be made aware of isSigned. */
+ key |= other.key;
+ }
+
+ /* Decrement. Needed only for ranges. */
+ inline void decrement();
+ inline void increment();
+};
+
+struct HostType
+{
+ const char *data1;
+ const char *data2;
+ bool isSigned;
+ long long minVal;
+ long long maxVal;
+ unsigned int size;
+};
+
+struct HostLang
+{
+ HostType *hostTypes;
+ int numHostTypes;
+ HostType *defaultAlphType;
+ bool explicitUnsigned;
+};
+
+
+/* Target language. */
+enum HostLangType
+{
+ CCode,
+ DCode,
+ JavaCode,
+ RubyCode
+};
+
+extern HostLang *hostLang;
+extern HostLangType hostLangType;
+
+extern HostLang hostLangC;
+extern HostLang hostLangD;
+extern HostLang hostLangJava;
+extern HostLang hostLangRuby;
+
+/* An abstraction of the key operators that manages key operations such as
+ * comparison and increment according the signedness of the key. */
+struct KeyOps
+{
+ /* Default to signed alphabet. */
+ KeyOps() :
+ isSigned(true),
+ alphType(0)
+ {}
+
+ /* Default to signed alphabet. */
+ KeyOps( bool isSigned )
+ :isSigned(isSigned) {}
+
+ bool isSigned;
+ Key minKey, maxKey;
+ HostType *alphType;
+
+ void setAlphType( HostType *alphType )
+ {
+ this->alphType = alphType;
+ isSigned = alphType->isSigned;
+ if ( isSigned ) {
+ minKey = (long) alphType->minVal;
+ maxKey = (long) alphType->maxVal;
+ }
+ else {
+ minKey = (long) (unsigned long) alphType->minVal;
+ maxKey = (long) (unsigned long) alphType->maxVal;
+ }
+ }
+
+ /* Compute the distance between two keys. */
+ Size span( Key key1, Key key2 )
+ {
+ return isSigned ?
+ (unsigned long long)(
+ (long long)key2.key -
+ (long long)key1.key + 1) :
+ (unsigned long long)(
+ (unsigned long)key2.key) -
+ (unsigned long long)((unsigned long)key1.key) + 1;
+ }
+
+ Size alphSize()
+ { return span( minKey, maxKey ); }
+
+ HostType *typeSubsumes( long long maxVal )
+ {
+ for ( int i = 0; i < hostLang->numHostTypes; i++ ) {
+ if ( maxVal <= hostLang->hostTypes[i].maxVal )
+ return hostLang->hostTypes + i;
+ }
+ return 0;
+ }
+
+ HostType *typeSubsumes( bool isSigned, long long maxVal )
+ {
+ for ( int i = 0; i < hostLang->numHostTypes; i++ ) {
+ if ( ( isSigned == hostLang->hostTypes[i].isSigned ) &&
+ maxVal <= hostLang->hostTypes[i].maxVal )
+ return hostLang->hostTypes + i;
+ }
+ return 0;
+ }
+};
+
+extern KeyOps *keyOps;
+
+inline bool operator<( const Key key1, const Key key2 )
+{
+ return keyOps->isSigned ? key1.key < key2.key :
+ (unsigned long)key1.key < (unsigned long)key2.key;
+}
+
+inline bool operator<=( const Key key1, const Key key2 )
+{
+ return keyOps->isSigned ? key1.key <= key2.key :
+ (unsigned long)key1.key <= (unsigned long)key2.key;
+}
+
+inline bool operator>( const Key key1, const Key key2 )
+{
+ return keyOps->isSigned ? key1.key > key2.key :
+ (unsigned long)key1.key > (unsigned long)key2.key;
+}
+
+inline bool operator>=( const Key key1, const Key key2 )
+{
+ return keyOps->isSigned ? key1.key >= key2.key :
+ (unsigned long)key1.key >= (unsigned long)key2.key;
+}
+
+inline bool operator==( const Key key1, const Key key2 )
+{
+ return key1.key == key2.key;
+}
+
+inline bool operator!=( const Key key1, const Key key2 )
+{
+ return key1.key != key2.key;
+}
+
+/* Decrement. Needed only for ranges. */
+inline void Key::decrement()
+{
+ key = keyOps->isSigned ? key - 1 : ((unsigned long)key)-1;
+}
+
+/* Increment. Needed only for ranges. */
+inline void Key::increment()
+{
+ key = keyOps->isSigned ? key+1 : ((unsigned long)key)+1;
+}
+
+inline long long Key::getLongLong() const
+{
+ return keyOps->isSigned ? (long long)key : (long long)(unsigned long)key;
+}
+
+inline Key operator+(const Key key1, const Key key2)
+{
+ /* FIXME: must be made aware of isSigned. */
+ return Key( key1.key + key2.key );
+}
+
+inline Key operator-(const Key key1, const Key key2)
+{
+ /* FIXME: must be made aware of isSigned. */
+ return Key( key1.key - key2.key );
+}
+
+inline long operator&(const Key key1, const Key key2)
+{
+ /* FIXME: must be made aware of isSigned. */
+ return key1.key & key2.key;
+}
+
+inline Key operator/(const Key key1, const Key key2)
+{
+ /* FIXME: must be made aware of isSigned. */
+ return key1.key / key2.key;
+}
+
+/* Filter on the output stream that keeps track of the number of lines
+ * output. */
+class output_filter : public std::filebuf
+{
+public:
+ output_filter( char *fileName ) : fileName(fileName), line(1) { }
+
+ virtual int sync();
+ virtual std::streamsize xsputn(const char* s, std::streamsize n);
+
+ char *fileName;
+ int line;
+};
+
+char *findFileExtension( char *stemFile );
+char *fileNameFromStem( char *stemFile, const char *suffix );
+
+struct Export
+{
+ Export(const char *name, Key key )
+ : name(name), key(key) {}
+
+ const char *name;
+ Key key;
+
+ Export *prev, *next;
+};
+
+typedef DList<Export> ExportList;
+
+#endif /* _COMMON_H */
diff --git a/contrib/tools/ragel5/common/config.h b/contrib/tools/ragel5/common/config.h
new file mode 100644
index 0000000000..405cfd6c3b
--- /dev/null
+++ b/contrib/tools/ragel5/common/config.h
@@ -0,0 +1,39 @@
+/* common/config.h. Generated by configure. */
+/*
+ * Copyright 2001 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+
+/* Programs. */
+/* #undef GDC */
+#define GOBJC gcc -x objective-c
+#define CXX c++
+#define CC cc
+/* #undef JAVAC */
+/* #undef TXL */
+/* #undef RUBY */
+
+#ifdef WIN32
+#define strcasecmp _stricmp
+#endif
+
+#endif /* _CONFIG_H */
diff --git a/contrib/tools/ragel5/common/pcheck.h b/contrib/tools/ragel5/common/pcheck.h
new file mode 100644
index 0000000000..5f95dc3c12
--- /dev/null
+++ b/contrib/tools/ragel5/common/pcheck.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2001, 2002 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _PCHECK_H
+#define _PCHECK_H
+
+class ParamCheck
+{
+public:
+ ParamCheck(const char *paramSpec, int argc, char **argv);
+
+ bool check();
+
+ char *parameterArg; /* The argument to the parameter. */
+ char parameter; /* The parameter matched. */
+ enum { match, invalid, noparam } state;
+
+ char *argOffset; /* If we are reading params inside an
+ * arg this points to the offset. */
+
+ char *curArg; /* Pointer to the current arg. */
+ int iCurArg; /* Index to the current arg. */
+
+private:
+ const char *paramSpec; /* Parameter spec supplied by the coder. */
+ int argc; /* Arguement data from the command line. */
+ char **argv;
+
+};
+
+void NormalizeWinPath(char* input);
+
+#endif /* _PCHECK_H */
diff --git a/contrib/tools/ragel5/common/version.h b/contrib/tools/ragel5/common/version.h
new file mode 100644
index 0000000000..dba4eb2154
--- /dev/null
+++ b/contrib/tools/ragel5/common/version.h
@@ -0,0 +1,2 @@
+#define VERSION "5.19"
+#define PUBDATE "March 2007"
diff --git a/contrib/tools/ragel5/common/ya.make b/contrib/tools/ragel5/common/ya.make
new file mode 100644
index 0000000000..7448cd2af3
--- /dev/null
+++ b/contrib/tools/ragel5/common/ya.make
@@ -0,0 +1,20 @@
+LIBRARY()
+
+LICENSE(GPL-2.0-or-later)
+
+NO_UTIL()
+NO_COMPILER_WARNINGS()
+
+ADDINCL(
+ GLOBAL contrib/tools/ragel5/common
+)
+
+PEERDIR(
+ contrib/tools/ragel5/aapl
+)
+
+SRCS(
+ common.cpp
+)
+
+END()
diff --git a/contrib/tools/ragel5/ragel/fsmap.cpp b/contrib/tools/ragel5/ragel/fsmap.cpp
new file mode 100644
index 0000000000..551aea0391
--- /dev/null
+++ b/contrib/tools/ragel5/ragel/fsmap.cpp
@@ -0,0 +1,840 @@
+/*
+ * Copyright 2002-2004 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "fsmgraph.h"
+#include <iostream>
+using std::cerr;
+using std::endl;
+
+CondData *condData = 0;
+KeyOps *keyOps = 0;
+
+/* Insert an action into an action table. */
+void ActionTable::setAction( int ordering, Action *action )
+{
+ /* Multi-insert in case specific instances of an action appear in a
+ * transition more than once. */
+ insertMulti( ordering, action );
+}
+
+/* Set all the action from another action table in this table. */
+void ActionTable::setActions( const ActionTable &other )
+{
+ for ( ActionTable::Iter action = other; action.lte(); action++ )
+ insertMulti( action->key, action->value );
+}
+
+void ActionTable::setActions( int *orderings, Action **actions, int nActs )
+{
+ for ( int a = 0; a < nActs; a++ )
+ insertMulti( orderings[a], actions[a] );
+}
+
+bool ActionTable::hasAction( Action *action )
+{
+ for ( int a = 0; a < length(); a++ ) {
+ if ( data[a].value == action )
+ return true;
+ }
+ return false;
+}
+
+/* Insert an action into an action table. */
+void LmActionTable::setAction( int ordering, LongestMatchPart *action )
+{
+ /* Multi-insert in case specific instances of an action appear in a
+ * transition more than once. */
+ insertMulti( ordering, action );
+}
+
+/* Set all the action from another action table in this table. */
+void LmActionTable::setActions( const LmActionTable &other )
+{
+ for ( LmActionTable::Iter action = other; action.lte(); action++ )
+ insertMulti( action->key, action->value );
+}
+
+void ErrActionTable::setAction( int ordering, Action *action, int transferPoint )
+{
+ insertMulti( ErrActionTableEl( action, ordering, transferPoint ) );
+}
+
+void ErrActionTable::setActions( const ErrActionTable &other )
+{
+ for ( ErrActionTable::Iter act = other; act.lte(); act++ )
+ insertMulti( ErrActionTableEl( act->action, act->ordering, act->transferPoint ) );
+}
+
+/* Insert a priority into this priority table. Looks out for priorities on
+ * duplicate keys. */
+void PriorTable::setPrior( int ordering, PriorDesc *desc )
+{
+ PriorEl *lastHit = 0;
+ PriorEl *insed = insert( PriorEl(ordering, desc), &lastHit );
+ if ( insed == 0 ) {
+ /* This already has a priority on the same key as desc. Overwrite the
+ * priority if the ordering is larger (later in time). */
+ if ( ordering >= lastHit->ordering )
+ *lastHit = PriorEl( ordering, desc );
+ }
+}
+
+/* Set all the priorities from a priorTable in this table. */
+void PriorTable::setPriors( const PriorTable &other )
+{
+ /* Loop src priorities once to overwrite duplicates. */
+ PriorTable::Iter priorIt = other;
+ for ( ; priorIt.lte(); priorIt++ )
+ setPrior( priorIt->ordering, priorIt->desc );
+}
+
+/* Set the priority of starting transitions. Isolates the start state so it has
+ * no other entry points, then sets the priorities of all the transitions out
+ * of the start state. If the start state is final, then the outPrior of the
+ * start state is also set. The idea is that a machine that accepts the null
+ * string can still specify the starting trans prior for when it accepts the
+ * null word. */
+void FsmAp::startFsmPrior( int ordering, PriorDesc *prior )
+{
+ /* Make sure the start state has no other entry points. */
+ isolateStartState();
+
+ /* Walk all transitions out of the start state. */
+ for ( TransList::Iter trans = startState->outList; trans.lte(); trans++ ) {
+ if ( trans->toState != 0 )
+ trans->priorTable.setPrior( ordering, prior );
+ }
+}
+
+/* Set the priority of all transitions in a graph. Walks all transition lists
+ * and all def transitions. */
+void FsmAp::allTransPrior( int ordering, PriorDesc *prior )
+{
+ /* Walk the list of all states. */
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ /* Walk the out list of the state. */
+ for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
+ if ( trans->toState != 0 )
+ trans->priorTable.setPrior( ordering, prior );
+ }
+ }
+}
+
+/* Set the priority of all transitions that go into a final state. Note that if
+ * any entry states are final, we will not be setting the priority of any
+ * transitions that may go into those states in the future. The graph does not
+ * support pending in transitions in the same way pending out transitions are
+ * supported. */
+void FsmAp::finishFsmPrior( int ordering, PriorDesc *prior )
+{
+ /* Walk all final states. */
+ for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) {
+ /* Walk all in transitions of the final state. */
+ for ( TransInList::Iter trans = (*state)->inList; trans.lte(); trans++ )
+ trans->priorTable.setPrior( ordering, prior );
+ }
+}
+
+/* Set the priority of any future out transitions that may be made going out of
+ * this state machine. */
+void FsmAp::leaveFsmPrior( int ordering, PriorDesc *prior )
+{
+ /* Set priority in all final states. */
+ for ( StateSet::Iter state = finStateSet; state.lte(); state++ )
+ (*state)->outPriorTable.setPrior( ordering, prior );
+}
+
+
+/* Set actions to execute on starting transitions. Isolates the start state
+ * so it has no other entry points, then adds to the transition functions
+ * of all the transitions out of the start state. If the start state is final,
+ * then the func is also added to the start state's out func list. The idea is
+ * that a machine that accepts the null string can execute a start func when it
+ * matches the null word, which can only be done when leaving the start/final
+ * state. */
+void FsmAp::startFsmAction( int ordering, Action *action )
+{
+ /* Make sure the start state has no other entry points. */
+ isolateStartState();
+
+ /* Walk the start state's transitions, setting functions. */
+ for ( TransList::Iter trans = startState->outList; trans.lte(); trans++ ) {
+ if ( trans->toState != 0 )
+ trans->actionTable.setAction( ordering, action );
+ }
+}
+
+/* Set functions to execute on all transitions. Walks the out lists of all
+ * states. */
+void FsmAp::allTransAction( int ordering, Action *action )
+{
+ /* Walk all states. */
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ /* Walk the out list of the state. */
+ for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
+ if ( trans->toState != 0 )
+ trans->actionTable.setAction( ordering, action );
+ }
+ }
+}
+
+/* Specify functions to execute upon entering final states. If the start state
+ * is final we can't really specify a function to execute upon entering that
+ * final state the first time. So function really means whenever entering a
+ * final state from within the same fsm. */
+void FsmAp::finishFsmAction( int ordering, Action *action )
+{
+ /* Walk all final states. */
+ for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) {
+ /* Walk the final state's in list. */
+ for ( TransInList::Iter trans = (*state)->inList; trans.lte(); trans++ )
+ trans->actionTable.setAction( ordering, action );
+ }
+}
+
+/* Add functions to any future out transitions that may be made going out of
+ * this state machine. */
+void FsmAp::leaveFsmAction( int ordering, Action *action )
+{
+ /* Insert the action in the outActionTable of all final states. */
+ for ( StateSet::Iter state = finStateSet; state.lte(); state++ )
+ (*state)->outActionTable.setAction( ordering, action );
+}
+
+/* Add functions to the longest match action table for constructing scanners. */
+void FsmAp::longMatchAction( int ordering, LongestMatchPart *lmPart )
+{
+ /* Walk all final states. */
+ for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) {
+ /* Walk the final state's in list. */
+ for ( TransInList::Iter trans = (*state)->inList; trans.lte(); trans++ )
+ trans->lmActionTable.setAction( ordering, lmPart );
+ }
+}
+
+void FsmAp::fillGaps( StateAp *state )
+{
+ if ( state->outList.length() == 0 ) {
+ /* Add the range on the lower and upper bound. */
+ attachNewTrans( state, 0, keyOps->minKey, keyOps->maxKey );
+ }
+ else {
+ TransList srcList;
+ srcList.transfer( state->outList );
+
+ /* Check for a gap at the beginning. */
+ TransList::Iter trans = srcList, next;
+ if ( keyOps->minKey < trans->lowKey ) {
+ /* Make the high key and append. */
+ Key highKey = trans->lowKey;
+ highKey.decrement();
+
+ attachNewTrans( state, 0, keyOps->minKey, highKey );
+ }
+
+ /* Write the transition. */
+ next = trans.next();
+ state->outList.append( trans );
+
+ /* Keep the last high end. */
+ Key lastHigh = trans->highKey;
+
+ /* Loop each source range. */
+ for ( trans = next; trans.lte(); trans = next ) {
+ /* Make the next key following the last range. */
+ Key nextKey = lastHigh;
+ nextKey.increment();
+
+ /* Check for a gap from last up to here. */
+ if ( nextKey < trans->lowKey ) {
+ /* Make the high end of the range that fills the gap. */
+ Key highKey = trans->lowKey;
+ highKey.decrement();
+
+ attachNewTrans( state, 0, nextKey, highKey );
+ }
+
+ /* Reduce the transition. If it reduced to anything then add it. */
+ next = trans.next();
+ state->outList.append( trans );
+
+ /* Keep the last high end. */
+ lastHigh = trans->highKey;
+ }
+
+ /* Now check for a gap on the end to fill. */
+ if ( lastHigh < keyOps->maxKey ) {
+ /* Get a copy of the default. */
+ lastHigh.increment();
+
+ attachNewTrans( state, 0, lastHigh, keyOps->maxKey );
+ }
+ }
+}
+
+void FsmAp::setErrorAction( StateAp *state, int ordering, Action *action )
+{
+ /* Fill any gaps in the out list with an error transition. */
+ fillGaps( state );
+
+ /* Set error transitions in the transitions that go to error. */
+ for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
+ if ( trans->toState == 0 )
+ trans->actionTable.setAction( ordering, action );
+ }
+}
+
+
+/* Give a target state for error transitions. */
+void FsmAp::setErrorTarget( StateAp *state, StateAp *target, int *orderings,
+ Action **actions, int nActs )
+{
+ /* Fill any gaps in the out list with an error transition. */
+ fillGaps( state );
+
+ /* Set error target in the transitions that go to error. */
+ for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
+ if ( trans->toState == 0 ) {
+ /* The trans goes to error, redirect it. */
+ redirectErrorTrans( trans->fromState, target, trans );
+ trans->actionTable.setActions( orderings, actions, nActs );
+ }
+ }
+}
+
+void FsmAp::transferErrorActions( StateAp *state, int transferPoint )
+{
+ for ( int i = 0; i < state->errActionTable.length(); ) {
+ ErrActionTableEl *act = state->errActionTable.data + i;
+ if ( act->transferPoint == transferPoint ) {
+ /* Transfer the error action and remove it. */
+ setErrorAction( state, act->ordering, act->action );
+ state->errActionTable.vremove( i );
+ }
+ else {
+ /* Not transfering and deleting, skip over the item. */
+ i += 1;
+ }
+ }
+}
+
+/* Set error actions in the start state. */
+void FsmAp::startErrorAction( int ordering, Action *action, int transferPoint )
+{
+ /* Make sure the start state has no other entry points. */
+ isolateStartState();
+
+ /* Add the actions. */
+ startState->errActionTable.setAction( ordering, action, transferPoint );
+}
+
+/* Set error actions in all states where there is a transition out. */
+void FsmAp::allErrorAction( int ordering, Action *action, int transferPoint )
+{
+ /* Insert actions in the error action table of all states. */
+ for ( StateList::Iter state = stateList; state.lte(); state++ )
+ state->errActionTable.setAction( ordering, action, transferPoint );
+}
+
+/* Set error actions in final states. */
+void FsmAp::finalErrorAction( int ordering, Action *action, int transferPoint )
+{
+ /* Add the action to the error table of final states. */
+ for ( StateSet::Iter state = finStateSet; state.lte(); state++ )
+ (*state)->errActionTable.setAction( ordering, action, transferPoint );
+}
+
+void FsmAp::notStartErrorAction( int ordering, Action *action, int transferPoint )
+{
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ if ( state != startState )
+ state->errActionTable.setAction( ordering, action, transferPoint );
+ }
+}
+
+void FsmAp::notFinalErrorAction( int ordering, Action *action, int transferPoint )
+{
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ if ( ! state->isFinState() )
+ state->errActionTable.setAction( ordering, action, transferPoint );
+ }
+}
+
+/* Set error actions in the states that have transitions into a final state. */
+void FsmAp::middleErrorAction( int ordering, Action *action, int transferPoint )
+{
+ /* Isolate the start state in case it is reachable from in inside the
+ * machine, in which case we don't want it set. */
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ if ( state != startState && ! state->isFinState() )
+ state->errActionTable.setAction( ordering, action, transferPoint );
+ }
+}
+
+/* Set EOF actions in the start state. */
+void FsmAp::startEOFAction( int ordering, Action *action )
+{
+ /* Make sure the start state has no other entry points. */
+ isolateStartState();
+
+ /* Add the actions. */
+ startState->eofActionTable.setAction( ordering, action );
+}
+
+/* Set EOF actions in all states where there is a transition out. */
+void FsmAp::allEOFAction( int ordering, Action *action )
+{
+ /* Insert actions in the EOF action table of all states. */
+ for ( StateList::Iter state = stateList; state.lte(); state++ )
+ state->eofActionTable.setAction( ordering, action );
+}
+
+/* Set EOF actions in final states. */
+void FsmAp::finalEOFAction( int ordering, Action *action )
+{
+ /* Add the action to the error table of final states. */
+ for ( StateSet::Iter state = finStateSet; state.lte(); state++ )
+ (*state)->eofActionTable.setAction( ordering, action );
+}
+
+void FsmAp::notStartEOFAction( int ordering, Action *action )
+{
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ if ( state != startState )
+ state->eofActionTable.setAction( ordering, action );
+ }
+}
+
+void FsmAp::notFinalEOFAction( int ordering, Action *action )
+{
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ if ( ! state->isFinState() )
+ state->eofActionTable.setAction( ordering, action );
+ }
+}
+
+/* Set EOF actions in the states that have transitions into a final state. */
+void FsmAp::middleEOFAction( int ordering, Action *action )
+{
+ /* Set the actions in all states that are not the start state and not final. */
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ if ( state != startState && ! state->isFinState() )
+ state->eofActionTable.setAction( ordering, action );
+ }
+}
+
+/*
+ * Set To State Actions.
+ */
+
+/* Set to state actions in the start state. */
+void FsmAp::startToStateAction( int ordering, Action *action )
+{
+ /* Make sure the start state has no other entry points. */
+ isolateStartState();
+ startState->toStateActionTable.setAction( ordering, action );
+}
+
+/* Set to state actions in all states. */
+void FsmAp::allToStateAction( int ordering, Action *action )
+{
+ /* Insert the action on all states. */
+ for ( StateList::Iter state = stateList; state.lte(); state++ )
+ state->toStateActionTable.setAction( ordering, action );
+}
+
+/* Set to state actions in final states. */
+void FsmAp::finalToStateAction( int ordering, Action *action )
+{
+ /* Add the action to the error table of final states. */
+ for ( StateSet::Iter state = finStateSet; state.lte(); state++ )
+ (*state)->toStateActionTable.setAction( ordering, action );
+}
+
+void FsmAp::notStartToStateAction( int ordering, Action *action )
+{
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ if ( state != startState )
+ state->toStateActionTable.setAction( ordering, action );
+ }
+}
+
+void FsmAp::notFinalToStateAction( int ordering, Action *action )
+{
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ if ( ! state->isFinState() )
+ state->toStateActionTable.setAction( ordering, action );
+ }
+}
+
+/* Set to state actions in states that are not final and not the start state. */
+void FsmAp::middleToStateAction( int ordering, Action *action )
+{
+ /* Set the action in all states that are not the start state and not final. */
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ if ( state != startState && ! state->isFinState() )
+ state->toStateActionTable.setAction( ordering, action );
+ }
+}
+
+/*
+ * Set From State Actions.
+ */
+
+void FsmAp::startFromStateAction( int ordering, Action *action )
+{
+ /* Make sure the start state has no other entry points. */
+ isolateStartState();
+ startState->fromStateActionTable.setAction( ordering, action );
+}
+
+void FsmAp::allFromStateAction( int ordering, Action *action )
+{
+ /* Insert the action on all states. */
+ for ( StateList::Iter state = stateList; state.lte(); state++ )
+ state->fromStateActionTable.setAction( ordering, action );
+}
+
+void FsmAp::finalFromStateAction( int ordering, Action *action )
+{
+ /* Add the action to the error table of final states. */
+ for ( StateSet::Iter state = finStateSet; state.lte(); state++ )
+ (*state)->fromStateActionTable.setAction( ordering, action );
+}
+
+void FsmAp::notStartFromStateAction( int ordering, Action *action )
+{
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ if ( state != startState )
+ state->fromStateActionTable.setAction( ordering, action );
+ }
+}
+
+void FsmAp::notFinalFromStateAction( int ordering, Action *action )
+{
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ if ( ! state->isFinState() )
+ state->fromStateActionTable.setAction( ordering, action );
+ }
+}
+
+void FsmAp::middleFromStateAction( int ordering, Action *action )
+{
+ /* Set the action in all states that are not the start state and not final. */
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ if ( state != startState && ! state->isFinState() )
+ state->fromStateActionTable.setAction( ordering, action );
+ }
+}
+
+/* Shift the function ordering of the start transitions to start
+ * at fromOrder and increase in units of 1. Useful before staring.
+ * Returns the maximum number of order numbers used. */
+int FsmAp::shiftStartActionOrder( int fromOrder )
+{
+ int maxUsed = 0;
+
+ /* Walk the start state's transitions, shifting function ordering. */
+ for ( TransList::Iter trans = startState->outList; trans.lte(); trans++ ) {
+ /* Walk the function data for the transition and set the keys to
+ * increasing values starting at fromOrder. */
+ int curFromOrder = fromOrder;
+ ActionTable::Iter action = trans->actionTable;
+ for ( ; action.lte(); action++ )
+ action->key = curFromOrder++;
+
+ /* Keep track of the max number of orders used. */
+ if ( curFromOrder - fromOrder > maxUsed )
+ maxUsed = curFromOrder - fromOrder;
+ }
+
+ return maxUsed;
+}
+
+/* Remove all priorities. */
+void FsmAp::clearAllPriorities()
+{
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ /* Clear out priority data. */
+ state->outPriorTable.empty();
+
+ /* Clear transition data from the out transitions. */
+ for ( TransList::Iter trans = state->outList; trans.lte(); trans++ )
+ trans->priorTable.empty();
+ }
+}
+
+/* Zeros out the function ordering keys. This may be called before minimization
+ * when it is known that no more fsm operations are going to be done. This
+ * will achieve greater reduction as states will not be separated on the basis
+ * of function ordering. */
+void FsmAp::nullActionKeys( )
+{
+ /* For each state... */
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ /* Walk the transitions for the state. */
+ for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
+ /* Walk the action table for the transition. */
+ for ( ActionTable::Iter action = trans->actionTable;
+ action.lte(); action++ )
+ action->key = 0;
+
+ /* Walk the action table for the transition. */
+ for ( LmActionTable::Iter action = trans->lmActionTable;
+ action.lte(); action++ )
+ action->key = 0;
+ }
+
+ /* Null the action keys of the to state action table. */
+ for ( ActionTable::Iter action = state->toStateActionTable;
+ action.lte(); action++ )
+ action->key = 0;
+
+ /* Null the action keys of the from state action table. */
+ for ( ActionTable::Iter action = state->fromStateActionTable;
+ action.lte(); action++ )
+ action->key = 0;
+
+ /* Null the action keys of the out transtions. */
+ for ( ActionTable::Iter action = state->outActionTable;
+ action.lte(); action++ )
+ action->key = 0;
+
+ /* Null the action keys of the error action table. */
+ for ( ErrActionTable::Iter action = state->errActionTable;
+ action.lte(); action++ )
+ action->ordering = 0;
+
+ /* Null the action keys eof action table. */
+ for ( ActionTable::Iter action = state->eofActionTable;
+ action.lte(); action++ )
+ action->key = 0;
+ }
+}
+
+/* Walk the list of states and verify that non final states do not have out
+ * data, that all stateBits are cleared, and that there are no states with
+ * zero foreign in transitions. */
+void FsmAp::verifyStates()
+{
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ /* Non final states should not have leaving data. */
+ if ( ! (state->stateBits & SB_ISFINAL) ) {
+ assert( state->outActionTable.length() == 0 );
+ assert( state->outCondSet.length() == 0 );
+ assert( state->outPriorTable.length() == 0 );
+ }
+
+ /* Data used in algorithms should be cleared. */
+ assert( (state->stateBits & SB_BOTH) == 0 );
+ assert( state->foreignInTrans > 0 );
+ }
+}
+
+/* Compare two transitions according to their relative priority. Since the
+ * base transition has no priority associated with it, the default is to
+ * return equal. */
+int FsmAp::comparePrior( const PriorTable &priorTable1, const PriorTable &priorTable2 )
+{
+ /* Looking for differing priorities on same keys. Need to concurrently
+ * scan the priority lists. */
+ PriorTable::Iter pd1 = priorTable1;
+ PriorTable::Iter pd2 = priorTable2;
+ while ( pd1.lte() && pd2.lte() ) {
+ /* Check keys. */
+ if ( pd1->desc->key < pd2->desc->key )
+ pd1.increment();
+ else if ( pd1->desc->key > pd2->desc->key )
+ pd2.increment();
+ /* Keys are the same, check priorities. */
+ else if ( pd1->desc->priority < pd2->desc->priority )
+ return -1;
+ else if ( pd1->desc->priority > pd2->desc->priority )
+ return 1;
+ else {
+ /* Keys and priorities are equal, advance both. */
+ pd1.increment();
+ pd2.increment();
+ }
+ }
+
+ /* No differing priorities on the same key. */
+ return 0;
+}
+
+/* Compares two transitions according to priority and functions. Pointers
+ * should not be null. Does not consider to state or from state. Compare two
+ * transitions according to the data contained in the transitions. Data means
+ * any properties added to user transitions that may differentiate them. Since
+ * the base transition has no data, the default is to return equal. */
+int FsmAp::compareTransData( TransAp *trans1, TransAp *trans2 )
+{
+ /* Compare the prior table. */
+ int cmpRes = CmpPriorTable::compare( trans1->priorTable,
+ trans2->priorTable );
+ if ( cmpRes != 0 )
+ return cmpRes;
+
+ /* Compare longest match action tables. */
+ cmpRes = CmpLmActionTable::compare(trans1->lmActionTable,
+ trans2->lmActionTable);
+ if ( cmpRes != 0 )
+ return cmpRes;
+
+ /* Compare action tables. */
+ return CmpActionTable::compare(trans1->actionTable,
+ trans2->actionTable);
+}
+
+/* Callback invoked when another trans (or possibly this) is added into this
+ * transition during the merging process. Draw in any properties of srcTrans
+ * into this transition. AddInTrans is called when a new transitions is made
+ * that will be a duplicate of another transition or a combination of several
+ * other transitions. AddInTrans will be called for each transition that the
+ * new transition is to represent. */
+void FsmAp::addInTrans( TransAp *destTrans, TransAp *srcTrans )
+{
+ /* Protect against adding in from ourselves. */
+ if ( srcTrans == destTrans ) {
+ /* Adding in ourselves, need to make a copy of the source transitions.
+ * The priorities are not copied in as that would have no effect. */
+ destTrans->lmActionTable.setActions( LmActionTable(srcTrans->lmActionTable) );
+ destTrans->actionTable.setActions( ActionTable(srcTrans->actionTable) );
+ }
+ else {
+ /* Not a copy of ourself, get the functions and priorities. */
+ destTrans->lmActionTable.setActions( srcTrans->lmActionTable );
+ destTrans->actionTable.setActions( srcTrans->actionTable );
+ destTrans->priorTable.setPriors( srcTrans->priorTable );
+ }
+}
+
+/* Compare the properties of states that are embedded by users. Compares out
+ * priorities, out transitions, to, from, out, error and eof action tables. */
+int FsmAp::compareStateData( const StateAp *state1, const StateAp *state2 )
+{
+ /* Compare the out priority table. */
+ int cmpRes = CmpPriorTable::
+ compare( state1->outPriorTable, state2->outPriorTable );
+ if ( cmpRes != 0 )
+ return cmpRes;
+
+ /* Test to state action tables. */
+ cmpRes = CmpActionTable::compare( state1->toStateActionTable,
+ state2->toStateActionTable );
+ if ( cmpRes != 0 )
+ return cmpRes;
+
+ /* Test from state action tables. */
+ cmpRes = CmpActionTable::compare( state1->fromStateActionTable,
+ state2->fromStateActionTable );
+ if ( cmpRes != 0 )
+ return cmpRes;
+
+ /* Test out action tables. */
+ cmpRes = CmpActionTable::compare( state1->outActionTable,
+ state2->outActionTable );
+ if ( cmpRes != 0 )
+ return cmpRes;
+
+ /* Test out condition sets. */
+ cmpRes = CmpActionSet::compare( state1->outCondSet,
+ state2->outCondSet );
+ if ( cmpRes != 0 )
+ return cmpRes;
+
+ /* Test out error action tables. */
+ cmpRes = CmpErrActionTable::compare( state1->errActionTable,
+ state2->errActionTable );
+ if ( cmpRes != 0 )
+ return cmpRes;
+
+ /* Test eof action tables. */
+ return CmpActionTable::compare( state1->eofActionTable,
+ state2->eofActionTable );
+}
+
+
+/* Invoked when a state looses its final state status and the leaving
+ * transition embedding data should be deleted. */
+void FsmAp::clearOutData( StateAp *state )
+{
+ /* Kill the out actions and priorities. */
+ state->outActionTable.empty();
+ state->outCondSet.empty();
+ state->outPriorTable.empty();
+}
+
+bool FsmAp::hasOutData( StateAp *state )
+{
+ return ( state->outActionTable.length() > 0 ||
+ state->outCondSet.length() > 0 ||
+ state->outPriorTable.length() > 0 );
+}
+
+/*
+ * Setting Conditions.
+ */
+
+
+void logNewExpansion( Expansion *exp );
+void logCondSpace( CondSpace *condSpace );
+
+CondSpace *FsmAp::addCondSpace( const CondSet &condSet )
+{
+ CondSpace *condSpace = condData->condSpaceMap.find( condSet );
+ if ( condSpace == 0 ) {
+ Key baseKey = condData->nextCondKey;
+ condData->nextCondKey += (1 << condSet.length() ) * keyOps->alphSize();
+
+ condSpace = new CondSpace( condSet );
+ condSpace->baseKey = baseKey;
+ condData->condSpaceMap.insert( condSpace );
+
+ #ifdef LOG_CONDS
+ cerr << "adding new condition space" << endl;
+ cerr << " condition set: ";
+ logCondSpace( condSpace );
+ cerr << endl;
+ cerr << " baseKey: " << baseKey.getVal() << endl;
+ #endif
+ }
+ return condSpace;
+}
+
+void FsmAp::startFsmCondition( Action *condAction )
+{
+ /* Make sure the start state has no other entry points. */
+ isolateStartState();
+ embedCondition( startState, condAction );
+}
+
+void FsmAp::allTransCondition( Action *condAction )
+{
+ for ( StateList::Iter state = stateList; state.lte(); state++ )
+ embedCondition( state, condAction );
+}
+
+void FsmAp::leaveFsmCondition( Action *condAction )
+{
+ for ( StateSet::Iter state = finStateSet; state.lte(); state++ )
+ (*state)->outCondSet.insert( condAction );
+}
diff --git a/contrib/tools/ragel5/ragel/fsmattach.cpp b/contrib/tools/ragel5/ragel/fsmattach.cpp
new file mode 100644
index 0000000000..6a90df658a
--- /dev/null
+++ b/contrib/tools/ragel5/ragel/fsmattach.cpp
@@ -0,0 +1,425 @@
+/*
+ * Copyright 2001 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <string.h>
+#include <assert.h>
+#include "fsmgraph.h"
+
+#include <iostream>
+using namespace std;
+
+/* Insert a transition into an inlist. The head must be supplied. */
+void FsmAp::attachToInList( StateAp *from, StateAp *to,
+ TransAp *&head, TransAp *trans )
+{
+ trans->ilnext = head;
+ trans->ilprev = 0;
+
+ /* If in trans list is not empty, set the head->prev to trans. */
+ if ( head != 0 )
+ head->ilprev = trans;
+
+ /* Now insert ourselves at the front of the list. */
+ head = trans;
+
+ /* Keep track of foreign transitions for from and to. */
+ if ( from != to ) {
+ if ( misfitAccounting ) {
+ /* If the number of foreign in transitions is about to go up to 1 then
+ * move it from the misfit list to the main list. */
+ if ( to->foreignInTrans == 0 )
+ stateList.append( misfitList.detach( to ) );
+ }
+
+ to->foreignInTrans += 1;
+ }
+};
+
+/* Detach a transition from an inlist. The head of the inlist must be supplied. */
+void FsmAp::detachFromInList( StateAp *from, StateAp *to,
+ TransAp *&head, TransAp *trans )
+{
+ /* Detach in the inTransList. */
+ if ( trans->ilprev == 0 )
+ head = trans->ilnext;
+ else
+ trans->ilprev->ilnext = trans->ilnext;
+
+ if ( trans->ilnext != 0 )
+ trans->ilnext->ilprev = trans->ilprev;
+
+ /* Keep track of foreign transitions for from and to. */
+ if ( from != to ) {
+ to->foreignInTrans -= 1;
+
+ if ( misfitAccounting ) {
+ /* If the number of foreign in transitions goes down to 0 then move it
+ * from the main list to the misfit list. */
+ if ( to->foreignInTrans == 0 )
+ misfitList.append( stateList.detach( to ) );
+ }
+ }
+}
+
+/* Attach states on the default transition, range list or on out/in list key.
+ * First makes a new transition. If there is already a transition out from
+ * fromState on the default, then will assertion fail. */
+TransAp *FsmAp::attachNewTrans( StateAp *from, StateAp *to, Key lowKey, Key highKey )
+{
+ /* Make the new transition. */
+ TransAp *retVal = new TransAp();
+
+ /* The transition is now attached. Remember the parties involved. */
+ retVal->fromState = from;
+ retVal->toState = to;
+
+ /* Make the entry in the out list for the transitions. */
+ from->outList.append( retVal );
+
+ /* Set the the keys of the new trans. */
+ retVal->lowKey = lowKey;
+ retVal->highKey = highKey;
+
+ /* Attach using inList as the head pointer. */
+ if ( to != 0 )
+ attachToInList( from, to, to->inList.head, retVal );
+
+ return retVal;
+}
+
+/* Attach for range lists or for the default transition. This attach should
+ * be used when a transition already is allocated and must be attached to a
+ * target state. Does not handle adding the transition into the out list. */
+void FsmAp::attachTrans( StateAp *from, StateAp *to, TransAp *trans )
+{
+ assert( trans->fromState == 0 && trans->toState == 0 );
+ trans->fromState = from;
+ trans->toState = to;
+
+ if ( to != 0 ) {
+ /* Attach using the inList pointer as the head pointer. */
+ attachToInList( from, to, to->inList.head, trans );
+ }
+}
+
+/* Redirect a transition away from error and towards some state. This is just
+ * like attachTrans except it requires fromState to be set and does not touch
+ * it. */
+void FsmAp::redirectErrorTrans( StateAp *from, StateAp *to, TransAp *trans )
+{
+ assert( trans->fromState != 0 && trans->toState == 0 );
+ trans->toState = to;
+
+ if ( to != 0 ) {
+ /* Attach using the inList pointer as the head pointer. */
+ attachToInList( from, to, to->inList.head, trans );
+ }
+}
+
+/* Detach for out/in lists or for default transition. */
+void FsmAp::detachTrans( StateAp *from, StateAp *to, TransAp *trans )
+{
+ assert( trans->fromState == from && trans->toState == to );
+ trans->fromState = 0;
+ trans->toState = 0;
+
+ if ( to != 0 ) {
+ /* Detach using to's inList pointer as the head. */
+ detachFromInList( from, to, to->inList.head, trans );
+ }
+}
+
+
+/* Detach a state from the graph. Detaches and deletes transitions in and out
+ * of the state. Empties inList and outList. Removes the state from the final
+ * state set. A detached state becomes useless and should be deleted. */
+void FsmAp::detachState( StateAp *state )
+{
+ /* Detach the in transitions from the inList list of transitions. */
+ while ( state->inList.head != 0 ) {
+ /* Get pointers to the trans and the state. */
+ TransAp *trans = state->inList.head;
+ StateAp *fromState = trans->fromState;
+
+ /* Detach the transitions from the source state. */
+ detachTrans( fromState, state, trans );
+
+ /* Ok to delete the transition. */
+ fromState->outList.detach( trans );
+ delete trans;
+ }
+
+ /* Remove the entry points in on the machine. */
+ while ( state->entryIds.length() > 0 )
+ unsetEntry( state->entryIds[0], state );
+
+ /* Detach out range transitions. */
+ for ( TransList::Iter trans = state->outList; trans.lte(); ) {
+ TransList::Iter next = trans.next();
+ detachTrans( state, trans->toState, trans );
+ delete trans;
+ trans = next;
+ }
+
+ /* Delete all of the out range pointers. */
+ state->outList.abandon();
+
+ /* Unset final stateness before detaching from graph. */
+ if ( state->stateBits & SB_ISFINAL )
+ finStateSet.remove( state );
+}
+
+
+/* Duplicate a transition. Makes a new transition that is attached to the same
+ * dest as srcTrans. The new transition has functions and priority taken from
+ * srcTrans. Used for merging a transition in to a free spot. The trans can
+ * just be dropped in. It does not conflict with an existing trans and need
+ * not be crossed. Returns the new transition. */
+TransAp *FsmAp::dupTrans( StateAp *from, TransAp *srcTrans )
+{
+ /* Make a new transition. */
+ TransAp *newTrans = new TransAp();
+
+ /* We can attach the transition, one does not exist. */
+ attachTrans( from, srcTrans->toState, newTrans );
+
+ /* Call the user callback to add in the original source transition. */
+ addInTrans( newTrans, srcTrans );
+
+ return newTrans;
+}
+
+/* In crossing, src trans and dest trans both go to existing states. Make one
+ * state from the sets of states that src and dest trans go to. */
+TransAp *FsmAp::fsmAttachStates( MergeData &md, StateAp *from,
+ TransAp *destTrans, TransAp *srcTrans )
+{
+ /* The priorities are equal. We must merge the transitions. Does the
+ * existing trans go to the state we are to attach to? ie, are we to
+ * simply double up the transition? */
+ StateAp *toState = srcTrans->toState;
+ StateAp *existingState = destTrans->toState;
+
+ if ( existingState == toState ) {
+ /* The transition is a double up to the same state. Copy the src
+ * trans into itself. We don't need to merge in the from out trans
+ * data, that was done already. */
+ addInTrans( destTrans, srcTrans );
+ }
+ else {
+ /* The trans is not a double up. Dest trans cannot be the same as src
+ * trans. Set up the state set. */
+ StateSet stateSet;
+
+ /* We go to all the states the existing trans goes to, plus... */
+ if ( existingState->stateDictEl == 0 )
+ stateSet.insert( existingState );
+ else
+ stateSet.insert( existingState->stateDictEl->stateSet );
+
+ /* ... all the states that we have been told to go to. */
+ if ( toState->stateDictEl == 0 )
+ stateSet.insert( toState );
+ else
+ stateSet.insert( toState->stateDictEl->stateSet );
+
+ /* Look for the state. If it is not there already, make it. */
+ StateDictEl *lastFound;
+ if ( md.stateDict.insert( stateSet, &lastFound ) ) {
+ /* Make a new state representing the combination of states in
+ * stateSet. It gets added to the fill list. This means that we
+ * need to fill in it's transitions sometime in the future. We
+ * don't do that now (ie, do not recurse). */
+ StateAp *combinState = addState();
+
+ /* Link up the dict element and the state. */
+ lastFound->targState = combinState;
+ combinState->stateDictEl = lastFound;
+
+ /* Add to the fill list. */
+ md.fillListAppend( combinState );
+ }
+
+ /* Get the state insertted/deleted. */
+ StateAp *targ = lastFound->targState;
+
+ /* Detach the state from existing state. */
+ detachTrans( from, existingState, destTrans );
+
+ /* Re-attach to the new target. */
+ attachTrans( from, targ, destTrans );
+
+ /* Add in src trans to the existing transition that we redirected to
+ * the new state. We don't need to merge in the from out trans data,
+ * that was done already. */
+ addInTrans( destTrans, srcTrans );
+ }
+
+ return destTrans;
+}
+
+/* Two transitions are to be crossed, handle the possibility of either going
+ * to the error state. */
+TransAp *FsmAp::mergeTrans( MergeData &md, StateAp *from,
+ TransAp *destTrans, TransAp *srcTrans )
+{
+ TransAp *retTrans = 0;
+ if ( destTrans->toState == 0 && srcTrans->toState == 0 ) {
+ /* Error added into error. */
+ addInTrans( destTrans, srcTrans );
+ retTrans = destTrans;
+ }
+ else if ( destTrans->toState == 0 && srcTrans->toState != 0 ) {
+ /* Non error added into error we need to detach and reattach, */
+ detachTrans( from, destTrans->toState, destTrans );
+ attachTrans( from, srcTrans->toState, destTrans );
+ addInTrans( destTrans, srcTrans );
+ retTrans = destTrans;
+ }
+ else if ( srcTrans->toState == 0 ) {
+ /* Dest goes somewhere but src doesn't, just add it it in. */
+ addInTrans( destTrans, srcTrans );
+ retTrans = destTrans;
+ }
+ else {
+ /* Both go somewhere, run the actual cross. */
+ retTrans = fsmAttachStates( md, from, destTrans, srcTrans );
+ }
+
+ return retTrans;
+}
+
+/* Find the trans with the higher priority. If src is lower priority then dest then
+ * src is ignored. If src is higher priority than dest, then src overwrites dest. If
+ * the priorities are equal, then they are merged. */
+TransAp *FsmAp::crossTransitions( MergeData &md, StateAp *from,
+ TransAp *destTrans, TransAp *srcTrans )
+{
+ TransAp *retTrans;
+
+ /* Compare the priority of the dest and src transitions. */
+ int compareRes = comparePrior( destTrans->priorTable, srcTrans->priorTable );
+ if ( compareRes < 0 ) {
+ /* Src trans has a higher priority than dest, src overwrites dest.
+ * Detach dest and return a copy of src. */
+ detachTrans( from, destTrans->toState, destTrans );
+ retTrans = dupTrans( from, srcTrans );
+ }
+ else if ( compareRes > 0 ) {
+ /* The dest trans has a higher priority, use dest. */
+ retTrans = destTrans;
+ }
+ else {
+ /* Src trans and dest trans have the same priority, they must be merged. */
+ retTrans = mergeTrans( md, from, destTrans, srcTrans );
+ }
+
+ /* Return the transition that resulted from the cross. */
+ return retTrans;
+}
+
+/* Copy the transitions in srcList to the outlist of dest. The srcList should
+ * not be the outList of dest, otherwise you would be copying the contents of
+ * srcList into itself as it's iterated: bad news. */
+void FsmAp::outTransCopy( MergeData &md, StateAp *dest, TransAp *srcList )
+{
+ /* The destination list. */
+ TransList destList;
+
+ /* Set up an iterator to stop at breaks. */
+ PairIter<TransAp> outPair( dest->outList.head, srcList );
+ for ( ; !outPair.end(); outPair++ ) {
+ switch ( outPair.userState ) {
+ case RangeInS1: {
+ /* The pair iter is the authority on the keys. It may have needed
+ * to break the dest range. */
+ TransAp *destTrans = outPair.s1Tel.trans;
+ destTrans->lowKey = outPair.s1Tel.lowKey;
+ destTrans->highKey = outPair.s1Tel.highKey;
+ destList.append( destTrans );
+ break;
+ }
+ case RangeInS2: {
+ /* Src range may get crossed with dest's default transition. */
+ TransAp *newTrans = dupTrans( dest, outPair.s2Tel.trans );
+
+ /* Set up the transition's keys and append to the dest list. */
+ newTrans->lowKey = outPair.s2Tel.lowKey;
+ newTrans->highKey = outPair.s2Tel.highKey;
+ destList.append( newTrans );
+ break;
+ }
+ case RangeOverlap: {
+ /* Exact overlap, cross them. */
+ TransAp *newTrans = crossTransitions( md, dest,
+ outPair.s1Tel.trans, outPair.s2Tel.trans );
+
+ /* Set up the transition's keys and append to the dest list. */
+ newTrans->lowKey = outPair.s1Tel.lowKey;
+ newTrans->highKey = outPair.s1Tel.highKey;
+ destList.append( newTrans );
+ break;
+ }
+ case BreakS1: {
+ /* Since we are always writing to the dest trans, the dest needs
+ * to be copied when it is broken. The copy goes into the first
+ * half of the break to "break it off". */
+ outPair.s1Tel.trans = dupTrans( dest, outPair.s1Tel.trans );
+ break;
+ }
+ case BreakS2:
+ break;
+ }
+ }
+
+ /* Abandon the old outList and transfer destList into it. */
+ dest->outList.transfer( destList );
+}
+
+
+/* Move all the transitions that go into src so that they go into dest. */
+void FsmAp::inTransMove( StateAp *dest, StateAp *src )
+{
+ /* Do not try to move in trans to and from the same state. */
+ assert( dest != src );
+
+ /* If src is the start state, dest becomes the start state. */
+ if ( src == startState ) {
+ unsetStartState();
+ setStartState( dest );
+ }
+
+ /* For each entry point into, create an entry point into dest, when the
+ * state is detached, the entry points to src will be removed. */
+ for ( EntryIdSet::Iter enId = src->entryIds; enId.lte(); enId++ )
+ changeEntry( *enId, dest, src );
+
+ /* Move the transitions in inList. */
+ while ( src->inList.head != 0 ) {
+ /* Get trans and from state. */
+ TransAp *trans = src->inList.head;
+ StateAp *fromState = trans->fromState;
+
+ /* Detach from src, reattach to dest. */
+ detachTrans( fromState, src, trans );
+ attachTrans( fromState, dest, trans );
+ }
+}
diff --git a/contrib/tools/ragel5/ragel/fsmbase.cpp b/contrib/tools/ragel5/ragel/fsmbase.cpp
new file mode 100644
index 0000000000..f1d7141c09
--- /dev/null
+++ b/contrib/tools/ragel5/ragel/fsmbase.cpp
@@ -0,0 +1,598 @@
+/*
+ * Copyright 2001-2007 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <string.h>
+#include <assert.h>
+#include "fsmgraph.h"
+
+/* Simple singly linked list append routine for the fill list. The new state
+ * goes to the end of the list. */
+void MergeData::fillListAppend( StateAp *state )
+{
+ state->alg.next = 0;
+
+ if ( stfillHead == 0 ) {
+ /* List is empty, state becomes head and tail. */
+ stfillHead = state;
+ stfillTail = state;
+ }
+ else {
+ /* List is not empty, state goes after last element. */
+ stfillTail->alg.next = state;
+ stfillTail = state;
+ }
+}
+
+/* Graph constructor. */
+FsmAp::FsmAp()
+:
+ /* No start state. */
+ startState(0),
+ errState(0),
+
+ /* Misfit accounting is a switch, turned on only at specific times. It
+ * controls what happens when states have no way in from the outside
+ * world.. */
+ misfitAccounting(false)
+{
+}
+
+/* Copy all graph data including transitions. */
+FsmAp::FsmAp( const FsmAp &graph )
+:
+ /* Lists start empty. Will be filled by copy. */
+ stateList(),
+ misfitList(),
+
+ /* Copy in the entry points,
+ * pointers will be resolved later. */
+ entryPoints(graph.entryPoints),
+ startState(graph.startState),
+ errState(0),
+
+ /* Will be filled by copy. */
+ finStateSet(),
+
+ /* Misfit accounting is only on during merging. */
+ misfitAccounting(false)
+{
+ /* Create the states and record their map in the original state. */
+ StateList::Iter origState = graph.stateList;
+ for ( ; origState.lte(); origState++ ) {
+ /* Make the new state. */
+ StateAp *newState = new StateAp( *origState );
+
+ /* Add the state to the list. */
+ stateList.append( newState );
+
+ /* Set the mapsTo item of the old state. */
+ origState->alg.stateMap = newState;
+ }
+
+ /* Derefernce all the state maps. */
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
+ /* The points to the original in the src machine. The taget's duplicate
+ * is in the statemap. */
+ StateAp *toState = trans->toState != 0 ? trans->toState->alg.stateMap : 0;
+
+ /* Attach The transition to the duplicate. */
+ trans->toState = 0;
+ attachTrans( state, toState, trans );
+ }
+ }
+
+ /* Fix the state pointers in the entry points array. */
+ EntryMapEl *eel = entryPoints.data;
+ for ( int e = 0; e < entryPoints.length(); e++, eel++ ) {
+ /* Get the duplicate of the state. */
+ eel->value = eel->value->alg.stateMap;
+
+ /* Foreign in transitions must be built up when duping machines so
+ * increment it here. */
+ eel->value->foreignInTrans += 1;
+ }
+
+ /* Fix the start state pointer and the new start state's count of in
+ * transiions. */
+ startState = startState->alg.stateMap;
+ startState->foreignInTrans += 1;
+
+ /* Build the final state set. */
+ StateSet::Iter st = graph.finStateSet;
+ for ( ; st.lte(); st++ )
+ finStateSet.insert((*st)->alg.stateMap);
+}
+
+/* Deletes all transition data then deletes each state. */
+FsmAp::~FsmAp()
+{
+ /* Delete all the transitions. */
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ /* Iterate the out transitions, deleting them. */
+ state->outList.empty();
+ }
+
+ /* Delete all the states. */
+ stateList.empty();
+}
+
+/* Set a state final. The state has its isFinState set to true and the state
+ * is added to the finStateSet. */
+void FsmAp::setFinState( StateAp *state )
+{
+ /* Is it already a fin state. */
+ if ( state->stateBits & SB_ISFINAL )
+ return;
+
+ state->stateBits |= SB_ISFINAL;
+ finStateSet.insert( state );
+}
+
+/* Set a state non-final. The has its isFinState flag set false and the state
+ * is removed from the final state set. */
+void FsmAp::unsetFinState( StateAp *state )
+{
+ /* Is it already a non-final state? */
+ if ( ! (state->stateBits & SB_ISFINAL) )
+ return;
+
+ /* When a state looses its final state status it must relinquish all the
+ * properties that are allowed only for final states. */
+ clearOutData( state );
+
+ state->stateBits &= ~ SB_ISFINAL;
+ finStateSet.remove( state );
+}
+
+/* Set and unset a state as the start state. */
+void FsmAp::setStartState( StateAp *state )
+{
+ /* Sould change from unset to set. */
+ assert( startState == 0 );
+ startState = state;
+
+ if ( misfitAccounting ) {
+ /* If the number of foreign in transitions is about to go up to 1 then
+ * take it off the misfit list and put it on the head list. */
+ if ( state->foreignInTrans == 0 )
+ stateList.append( misfitList.detach( state ) );
+ }
+
+ /* Up the foreign in transitions to the state. */
+ state->foreignInTrans += 1;
+}
+
+void FsmAp::unsetStartState()
+{
+ /* Should change from set to unset. */
+ assert( startState != 0 );
+
+ /* Decrement the entry's count of foreign entries. */
+ startState->foreignInTrans -= 1;
+
+ if ( misfitAccounting ) {
+ /* If the number of foreign in transitions just went down to 0 then take
+ * it off the main list and put it on the misfit list. */
+ if ( startState->foreignInTrans == 0 )
+ misfitList.append( stateList.detach( startState ) );
+ }
+
+ startState = 0;
+}
+
+/* Associate an id with a state. Makes the state a named entry point. Has no
+ * effect if the entry point is already mapped to the state. */
+void FsmAp::setEntry( int id, StateAp *state )
+{
+ /* Insert the id into the state. If the state is already labelled with id,
+ * nothing to do. */
+ if ( state->entryIds.insert( id ) ) {
+ /* Insert the entry and assert that it succeeds. */
+ entryPoints.insertMulti( id, state );
+
+ if ( misfitAccounting ) {
+ /* If the number of foreign in transitions is about to go up to 1 then
+ * take it off the misfit list and put it on the head list. */
+ if ( state->foreignInTrans == 0 )
+ stateList.append( misfitList.detach( state ) );
+ }
+
+ /* Up the foreign in transitions to the state. */
+ state->foreignInTrans += 1;
+ }
+}
+
+/* Remove the association of an id with a state. The state looses it's entry
+ * point status. Assumes that the id is indeed mapped to state. */
+void FsmAp::unsetEntry( int id, StateAp *state )
+{
+ /* Find the entry point in on id. */
+ EntryMapEl *enLow = 0, *enHigh = 0;
+ entryPoints.findMulti( id, enLow, enHigh );
+ while ( enLow->value != state )
+ enLow += 1;
+
+ /* Remove the record from the map. */
+ entryPoints.remove( enLow );
+
+ /* Remove the state's sense of the link. */
+ state->entryIds.remove( id );
+ state->foreignInTrans -= 1;
+ if ( misfitAccounting ) {
+ /* If the number of foreign in transitions just went down to 0 then take
+ * it off the main list and put it on the misfit list. */
+ if ( state->foreignInTrans == 0 )
+ misfitList.append( stateList.detach( state ) );
+ }
+}
+
+/* Remove all association of an id with states. Assumes that the id is indeed
+ * mapped to a state. */
+void FsmAp::unsetEntry( int id )
+{
+ /* Find the entry point in on id. */
+ EntryMapEl *enLow = 0, *enHigh = 0;
+ entryPoints.findMulti( id, enLow, enHigh );
+ for ( EntryMapEl *mel = enLow; mel <= enHigh; mel++ ) {
+ /* Remove the state's sense of the link. */
+ mel->value->entryIds.remove( id );
+ mel->value->foreignInTrans -= 1;
+ if ( misfitAccounting ) {
+ /* If the number of foreign in transitions just went down to 0
+ * then take it off the main list and put it on the misfit list. */
+ if ( mel->value->foreignInTrans == 0 )
+ misfitList.append( stateList.detach( mel->value ) );
+ }
+ }
+
+ /* Remove the records from the entry points map. */
+ entryPoints.removeMulti( enLow, enHigh );
+}
+
+
+void FsmAp::changeEntry( int id, StateAp *to, StateAp *from )
+{
+ /* Find the entry in the entry map. */
+ EntryMapEl *enLow = 0, *enHigh = 0;
+ entryPoints.findMulti( id, enLow, enHigh );
+ while ( enLow->value != from )
+ enLow += 1;
+
+ /* Change it to the new target. */
+ enLow->value = to;
+
+ /* Remove from's sense of the link. */
+ from->entryIds.remove( id );
+ from->foreignInTrans -= 1;
+ if ( misfitAccounting ) {
+ /* If the number of foreign in transitions just went down to 0 then take
+ * it off the main list and put it on the misfit list. */
+ if ( from->foreignInTrans == 0 )
+ misfitList.append( stateList.detach( from ) );
+ }
+
+ /* Add to's sense of the link. */
+ if ( to->entryIds.insert( id ) != 0 ) {
+ if ( misfitAccounting ) {
+ /* If the number of foreign in transitions is about to go up to 1 then
+ * take it off the misfit list and put it on the head list. */
+ if ( to->foreignInTrans == 0 )
+ stateList.append( misfitList.detach( to ) );
+ }
+
+ /* Up the foreign in transitions to the state. */
+ to->foreignInTrans += 1;
+ }
+}
+
+
+/* Clear all entry points from a machine. */
+void FsmAp::unsetAllEntryPoints()
+{
+ for ( EntryMap::Iter en = entryPoints; en.lte(); en++ ) {
+ /* Kill all the state's entry points at once. */
+ if ( en->value->entryIds.length() > 0 ) {
+ en->value->foreignInTrans -= en->value->entryIds.length();
+
+ if ( misfitAccounting ) {
+ /* If the number of foreign in transitions just went down to 0
+ * then take it off the main list and put it on the misfit
+ * list. */
+ if ( en->value->foreignInTrans == 0 )
+ misfitList.append( stateList.detach( en->value ) );
+ }
+
+ /* Clear the set of ids out all at once. */
+ en->value->entryIds.empty();
+ }
+ }
+
+ /* Now clear out the entry map all at once. */
+ entryPoints.empty();
+}
+
+/* Assigning an epsilon transition into final states. */
+void FsmAp::epsilonTrans( int id )
+{
+ for ( StateSet::Iter fs = finStateSet; fs.lte(); fs++ )
+ (*fs)->epsilonTrans.append( id );
+}
+
+/* Mark all states reachable from state. Traverses transitions forward. Used
+ * for removing states that have no path into them. */
+void FsmAp::markReachableFromHere( StateAp *state )
+{
+ /* Base case: return; */
+ if ( state->stateBits & SB_ISMARKED )
+ return;
+
+ /* Set this state as processed. We are going to visit all states that this
+ * state has a transition to. */
+ state->stateBits |= SB_ISMARKED;
+
+ /* Recurse on all out transitions. */
+ for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
+ if ( trans->toState != 0 )
+ markReachableFromHere( trans->toState );
+ }
+}
+
+void FsmAp::markReachableFromHereStopFinal( StateAp *state )
+{
+ /* Base case: return; */
+ if ( state->stateBits & SB_ISMARKED )
+ return;
+
+ /* Set this state as processed. We are going to visit all states that this
+ * state has a transition to. */
+ state->stateBits |= SB_ISMARKED;
+
+ /* Recurse on all out transitions. */
+ for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
+ StateAp *toState = trans->toState;
+ if ( toState != 0 && !toState->isFinState() )
+ markReachableFromHereStopFinal( toState );
+ }
+}
+
+/* Mark all states reachable from state. Traverse transitions backwards. Used
+ * for removing dead end paths in graphs. */
+void FsmAp::markReachableFromHereReverse( StateAp *state )
+{
+ /* Base case: return; */
+ if ( state->stateBits & SB_ISMARKED )
+ return;
+
+ /* Set this state as processed. We are going to visit all states with
+ * transitions into this state. */
+ state->stateBits |= SB_ISMARKED;
+
+ /* Recurse on all items in transitions. */
+ for ( TransInList::Iter trans = state->inList; trans.lte(); trans++ )
+ markReachableFromHereReverse( trans->fromState );
+}
+
+/* Determine if there are any entry points into a start state other than the
+ * start state. Setting starting transitions requires that the start state be
+ * isolated. In most cases a start state will already be isolated. */
+bool FsmAp::isStartStateIsolated()
+{
+ /* If there are any in transitions then the state is not isolated. */
+ if ( startState->inList.head != 0 )
+ return false;
+
+ /* If there are any entry points then isolated. */
+ if ( startState->entryIds.length() > 0 )
+ return false;
+
+ return true;
+}
+
+/* Bring in other's entry points. Assumes others states are going to be
+ * copied into this machine. */
+void FsmAp::copyInEntryPoints( FsmAp *other )
+{
+ /* Use insert multi because names are not unique. */
+ for ( EntryMap::Iter en = other->entryPoints; en.lte(); en++ )
+ entryPoints.insertMulti( en->key, en->value );
+}
+
+
+void FsmAp::unsetAllFinStates()
+{
+ for ( StateSet::Iter st = finStateSet; st.lte(); st++ )
+ (*st)->stateBits &= ~ SB_ISFINAL;
+ finStateSet.empty();
+}
+
+void FsmAp::setFinBits( int finStateBits )
+{
+ for ( int s = 0; s < finStateSet.length(); s++ )
+ finStateSet.data[s]->stateBits |= finStateBits;
+}
+
+
+/* Tests the integrity of the transition lists and the fromStates. */
+void FsmAp::verifyIntegrity()
+{
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ /* Walk the out transitions and assert fromState is correct. */
+ for ( TransList::Iter trans = state->outList; trans.lte(); trans++ )
+ assert( trans->fromState == state );
+
+ /* Walk the inlist and assert toState is correct. */
+ for ( TransInList::Iter trans = state->inList; trans.lte(); trans++ )
+ assert( trans->toState == state );
+ }
+}
+
+void FsmAp::verifyReachability()
+{
+ /* Mark all the states that can be reached
+ * through the set of entry points. */
+ markReachableFromHere( startState );
+ for ( EntryMap::Iter en = entryPoints; en.lte(); en++ )
+ markReachableFromHere( en->value );
+
+ /* Check that everything got marked. */
+ for ( StateList::Iter st = stateList; st.lte(); st++ ) {
+ /* Assert it got marked and then clear the mark. */
+ assert( st->stateBits & SB_ISMARKED );
+ st->stateBits &= ~ SB_ISMARKED;
+ }
+}
+
+void FsmAp::verifyNoDeadEndStates()
+{
+ /* Mark all states that have paths to the final states. */
+ for ( StateSet::Iter pst = finStateSet; pst.lte(); pst++ )
+ markReachableFromHereReverse( *pst );
+
+ /* Start state gets honorary marking. Must be done AFTER recursive call. */
+ startState->stateBits |= SB_ISMARKED;
+
+ /* Make sure everything got marked. */
+ for ( StateList::Iter st = stateList; st.lte(); st++ ) {
+ /* Assert the state got marked and unmark it. */
+ assert( st->stateBits & SB_ISMARKED );
+ st->stateBits &= ~ SB_ISMARKED;
+ }
+}
+
+void FsmAp::depthFirstOrdering( StateAp *state )
+{
+ /* Nothing to do if the state is already on the list. */
+ if ( state->stateBits & SB_ONLIST )
+ return;
+
+ /* Doing depth first, put state on the list. */
+ state->stateBits |= SB_ONLIST;
+ stateList.append( state );
+
+ /* Recurse on everything ranges. */
+ for ( TransList::Iter tel = state->outList; tel.lte(); tel++ ) {
+ if ( tel->toState != 0 )
+ depthFirstOrdering( tel->toState );
+ }
+}
+
+/* Ordering states by transition connections. */
+void FsmAp::depthFirstOrdering()
+{
+ /* Init on state list flags. */
+ for ( StateList::Iter st = stateList; st.lte(); st++ )
+ st->stateBits &= ~SB_ONLIST;
+
+ /* Clear out the state list, we will rebuild it. */
+ int stateListLen = stateList.length();
+ stateList.abandon();
+
+ /* Add back to the state list from the start state and all other entry
+ * points. */
+ if ( errState != 0 )
+ depthFirstOrdering( errState );
+ depthFirstOrdering( startState );
+ for ( EntryMap::Iter en = entryPoints; en.lte(); en++ )
+ depthFirstOrdering( en->value );
+
+ /* Make sure we put everything back on. */
+ assert( stateListLen == stateList.length() );
+}
+
+/* Stable sort the states by final state status. */
+void FsmAp::sortStatesByFinal()
+{
+ /* Move forward through the list and throw final states onto the end. */
+ StateAp *state = 0;
+ StateAp *next = stateList.head;
+ StateAp *last = stateList.tail;
+ while ( state != last ) {
+ /* Move forward and load up the next. */
+ state = next;
+ next = state->next;
+
+ /* Throw to the end? */
+ if ( state->isFinState() ) {
+ stateList.detach( state );
+ stateList.append( state );
+ }
+ }
+}
+
+void FsmAp::setStateNumbers( int base )
+{
+ for ( StateList::Iter state = stateList; state.lte(); state++ )
+ state->alg.stateNum = base++;
+}
+
+
+bool FsmAp::checkErrTrans( StateAp *state, TransAp *trans )
+{
+ /* Might go directly to error state. */
+ if ( trans->toState == 0 )
+ return true;
+
+ if ( trans->prev == 0 ) {
+ /* If this is the first transition. */
+ if ( keyOps->minKey < trans->lowKey )
+ return true;
+ }
+ else {
+ /* Not the first transition. Compare against the prev. */
+ TransAp *prev = trans->prev;
+ Key nextKey = prev->highKey;
+ nextKey.increment();
+ if ( nextKey < trans->lowKey )
+ return true;
+ }
+ return false;
+}
+
+bool FsmAp::checkErrTransFinish( StateAp *state )
+{
+ /* Check if there are any ranges already. */
+ if ( state->outList.length() == 0 )
+ return true;
+ else {
+ /* Get the last and check for a gap on the end. */
+ TransAp *last = state->outList.tail;
+ if ( last->highKey < keyOps->maxKey )
+ return true;
+ }
+ return 0;
+}
+
+bool FsmAp::hasErrorTrans()
+{
+ bool result;
+ for ( StateList::Iter st = stateList; st.lte(); st++ ) {
+ for ( TransList::Iter tr = st->outList; tr.lte(); tr++ ) {
+ result = checkErrTrans( st, tr );
+ if ( result )
+ return true;
+ }
+ result = checkErrTransFinish( st );
+ if ( result )
+ return true;
+ }
+ return false;
+}
diff --git a/contrib/tools/ragel5/ragel/fsmgraph.cpp b/contrib/tools/ragel5/ragel/fsmgraph.cpp
new file mode 100644
index 0000000000..d7d0ba4fe2
--- /dev/null
+++ b/contrib/tools/ragel5/ragel/fsmgraph.cpp
@@ -0,0 +1,1426 @@
+/*
+ * Copyright 2001, 2002, 2006 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <assert.h>
+#include <iostream>
+
+#include "fsmgraph.h"
+#include "mergesort.h"
+#include "parsedata.h"
+
+using std::cerr;
+using std::endl;
+
+/* Make a new state. The new state will be put on the graph's
+ * list of state. The new state can be created final or non final. */
+StateAp *FsmAp::addState()
+{
+ /* Make the new state to return. */
+ StateAp *state = new StateAp();
+
+ if ( misfitAccounting ) {
+ /* Create the new state on the misfit list. All states are created
+ * with no foreign in transitions. */
+ misfitList.append( state );
+ }
+ else {
+ /* Create the new state. */
+ stateList.append( state );
+ }
+
+ return state;
+}
+
+/* Construct an FSM that is the concatenation of an array of characters. A new
+ * machine will be made that has len+1 states with one transition between each
+ * state for each integer in str. IsSigned determines if the integers are to
+ * be considered as signed or unsigned ints. */
+void FsmAp::concatFsm( Key *str, int len )
+{
+ /* Make the first state and set it as the start state. */
+ StateAp *last = addState();
+ setStartState( last );
+
+ /* Attach subsequent states. */
+ for ( int i = 0; i < len; i++ ) {
+ StateAp *newState = addState();
+ attachNewTrans( last, newState, str[i], str[i] );
+ last = newState;
+ }
+
+ /* Make the last state the final state. */
+ setFinState( last );
+}
+
+/* Case insensitive version of concatFsm. */
+void FsmAp::concatFsmCI( Key *str, int len )
+{
+ /* Make the first state and set it as the start state. */
+ StateAp *last = addState();
+ setStartState( last );
+
+ /* Attach subsequent states. */
+ for ( int i = 0; i < len; i++ ) {
+ StateAp *newState = addState();
+
+ KeySet keySet;
+ if ( str[i].isLower() )
+ keySet.insert( str[i].toUpper() );
+ if ( str[i].isUpper() )
+ keySet.insert( str[i].toLower() );
+ keySet.insert( str[i] );
+
+ for ( int i = 0; i < keySet.length(); i++ )
+ attachNewTrans( last, newState, keySet[i], keySet[i] );
+
+ last = newState;
+ }
+
+ /* Make the last state the final state. */
+ setFinState( last );
+}
+
+/* Construct a machine that matches one character. A new machine will be made
+ * that has two states with a single transition between the states. IsSigned
+ * determines if the integers are to be considered as signed or unsigned ints. */
+void FsmAp::concatFsm( Key chr )
+{
+ /* Two states first start, second final. */
+ setStartState( addState() );
+
+ StateAp *end = addState();
+ setFinState( end );
+
+ /* Attach on the character. */
+ attachNewTrans( startState, end, chr, chr );
+}
+
+/* Construct a machine that matches any character in set. A new machine will
+ * be made that has two states and len transitions between the them. The set
+ * should be ordered correctly accroding to KeyOps and should not contain
+ * any duplicates. */
+void FsmAp::orFsm( Key *set, int len )
+{
+ /* Two states first start, second final. */
+ setStartState( addState() );
+
+ StateAp *end = addState();
+ setFinState( end );
+
+ for ( int i = 1; i < len; i++ )
+ assert( set[i-1] < set[i] );
+
+ /* Attach on all the integers in the given string of ints. */
+ for ( int i = 0; i < len; i++ )
+ attachNewTrans( startState, end, set[i], set[i] );
+}
+
+/* Construct a machine that matches a range of characters. A new machine will
+ * be made with two states and a range transition between them. The range will
+ * match any characters from low to high inclusive. Low should be less than or
+ * equal to high otherwise undefined behaviour results. IsSigned determines
+ * if the integers are to be considered as signed or unsigned ints. */
+void FsmAp::rangeFsm( Key low, Key high )
+{
+ /* Two states first start, second final. */
+ setStartState( addState() );
+
+ StateAp *end = addState();
+ setFinState( end );
+
+ /* Attach using the range of characters. */
+ attachNewTrans( startState, end, low, high );
+}
+
+/* Construct a machine that a repeated range of characters. */
+void FsmAp::rangeStarFsm( Key low, Key high)
+{
+ /* One state which is final and is the start state. */
+ setStartState( addState() );
+ setFinState( startState );
+
+ /* Attach start to start using range of characters. */
+ attachNewTrans( startState, startState, low, high );
+}
+
+/* Construct a machine that matches the empty string. A new machine will be
+ * made with only one state. The new state will be both a start and final
+ * state. IsSigned determines if the machine has a signed or unsigned
+ * alphabet. Fsm operations must be done on machines with the same alphabet
+ * signedness. */
+void FsmAp::lambdaFsm( )
+{
+ /* Give it one state with no transitions making it
+ * the start state and final state. */
+ setStartState( addState() );
+ setFinState( startState );
+}
+
+/* Construct a machine that matches nothing at all. A new machine will be
+ * made with only one state. It will not be final. */
+void FsmAp::emptyFsm( )
+{
+ /* Give it one state with no transitions making it
+ * the start state and final state. */
+ setStartState( addState() );
+}
+
+void FsmAp::transferOutData( StateAp *destState, StateAp *srcState )
+{
+ for ( TransList::Iter trans = destState->outList; trans.lte(); trans++ ) {
+ if ( trans->toState != 0 ) {
+ /* Get the actions data from the outActionTable. */
+ trans->actionTable.setActions( srcState->outActionTable );
+
+ /* Get the priorities from the outPriorTable. */
+ trans->priorTable.setPriors( srcState->outPriorTable );
+ }
+ }
+}
+
+/* Kleene star operator. Makes this machine the kleene star of itself. Any
+ * transitions made going out of the machine and back into itself will be
+ * notified that they are leaving transitions by having the leavingFromState
+ * callback invoked. */
+void FsmAp::starOp( )
+{
+ /* For the merging process. */
+ MergeData md;
+
+ /* Turn on misfit accounting to possibly catch the old start state. */
+ setMisfitAccounting( true );
+
+ /* Create the new new start state. It will be set final after the merging
+ * of the final states with the start state is complete. */
+ StateAp *prevStartState = startState;
+ unsetStartState();
+ setStartState( addState() );
+
+ /* Merge the new start state with the old one to isolate it. */
+ mergeStates( md, startState, prevStartState );
+
+ /* Merge the start state into all final states. Except the start state on
+ * the first pass. If the start state is set final we will be doubling up
+ * its transitions, which will get transfered to any final states that
+ * follow it in the final state set. This will be determined by the order
+ * of items in the final state set. To prevent this we just merge with the
+ * start on a second pass. */
+ for ( StateSet::Iter st = finStateSet; st.lte(); st++ ) {
+ if ( *st != startState )
+ mergeStatesLeaving( md, *st, startState );
+ }
+
+ /* Now it is safe to merge the start state with itself (provided it
+ * is set final). */
+ if ( startState->isFinState() )
+ mergeStatesLeaving( md, startState, startState );
+
+ /* Now ensure the new start state is a final state. */
+ setFinState( startState );
+
+ /* Fill in any states that were newed up as combinations of others. */
+ fillInStates( md );
+
+ /* Remove the misfits and turn off misfit accounting. */
+ removeMisfits();
+ setMisfitAccounting( false );
+}
+
+void FsmAp::repeatOp( int times )
+{
+ /* Must be 1 and up. 0 produces null machine and requires deleting this. */
+ assert( times > 0 );
+
+ /* A repeat of one does absolutely nothing. */
+ if ( times == 1 )
+ return;
+
+ /* Make a machine to make copies from. */
+ FsmAp *copyFrom = new FsmAp( *this );
+
+ /* Concatentate duplicates onto the end up until before the last. */
+ for ( int i = 1; i < times-1; i++ ) {
+ FsmAp *dup = new FsmAp( *copyFrom );
+ doConcat( dup, 0, false );
+ }
+
+ /* Now use the copyFrom on the end. */
+ doConcat( copyFrom, 0, false );
+}
+
+void FsmAp::optionalRepeatOp( int times )
+{
+ /* Must be 1 and up. 0 produces null machine and requires deleting this. */
+ assert( times > 0 );
+
+ /* A repeat of one optional merely allows zero string. */
+ if ( times == 1 ) {
+ setFinState( startState );
+ return;
+ }
+
+ /* Make a machine to make copies from. */
+ FsmAp *copyFrom = new FsmAp( *this );
+
+ /* The state set used in the from end of the concatentation. Starts with
+ * the initial final state set, then after each concatenation, gets set to
+ * the the final states that come from the the duplicate. */
+ StateSet lastFinSet( finStateSet );
+
+ /* Set the initial state to zero to allow zero copies. */
+ setFinState( startState );
+
+ /* Concatentate duplicates onto the end up until before the last. */
+ for ( int i = 1; i < times-1; i++ ) {
+ /* Make a duplicate for concating and set the fin bits to graph 2 so we
+ * can pick out it's final states after the optional style concat. */
+ FsmAp *dup = new FsmAp( *copyFrom );
+ dup->setFinBits( SB_GRAPH2 );
+ doConcat( dup, &lastFinSet, true );
+
+ /* Clear the last final state set and make the new one by taking only
+ * the final states that come from graph 2.*/
+ lastFinSet.empty();
+ for ( int i = 0; i < finStateSet.length(); i++ ) {
+ /* If the state came from graph 2, add it to the last set and clear
+ * the bits. */
+ StateAp *fs = finStateSet[i];
+ if ( fs->stateBits & SB_GRAPH2 ) {
+ lastFinSet.insert( fs );
+ fs->stateBits &= ~SB_GRAPH2;
+ }
+ }
+ }
+
+ /* Now use the copyFrom on the end, no bits set, no bits to clear. */
+ doConcat( copyFrom, &lastFinSet, true );
+}
+
+
+/* Fsm concatentation worker. Supports treating the concatentation as optional,
+ * which essentially leaves the final states of machine one as final. */
+void FsmAp::doConcat( FsmAp *other, StateSet *fromStates, bool optional )
+{
+ /* For the merging process. */
+ StateSet finStateSetCopy, startStateSet;
+ MergeData md;
+
+ /* Turn on misfit accounting for both graphs. */
+ setMisfitAccounting( true );
+ other->setMisfitAccounting( true );
+
+ /* Get the other's start state. */
+ StateAp *otherStartState = other->startState;
+
+ /* Unset other's start state before bringing in the entry points. */
+ other->unsetStartState();
+
+ /* Bring in the rest of other's entry points. */
+ copyInEntryPoints( other );
+ other->entryPoints.empty();
+
+ /* Bring in other's states into our state lists. */
+ stateList.append( other->stateList );
+ misfitList.append( other->misfitList );
+
+ /* If from states is not set, then get a copy of our final state set before
+ * we clobber it and use it instead. */
+ if ( fromStates == 0 ) {
+ finStateSetCopy = finStateSet;
+ fromStates = &finStateSetCopy;
+ }
+
+ /* Unset all of our final states and get the final states from other. */
+ if ( !optional )
+ unsetAllFinStates();
+ finStateSet.insert( other->finStateSet );
+
+ /* Since other's lists are empty, we can delete the fsm without
+ * affecting any states. */
+ delete other;
+
+ /* Merge our former final states with the start state of other. */
+ for ( int i = 0; i < fromStates->length(); i++ ) {
+ StateAp *state = fromStates->data[i];
+
+ /* Merge the former final state with other's start state. */
+ mergeStatesLeaving( md, state, otherStartState );
+
+ /* If the former final state was not reset final then we must clear
+ * the state's out trans data. If it got reset final then it gets to
+ * keep its out trans data. This must be done before fillInStates gets
+ * called to prevent the data from being sourced. */
+ if ( ! state->isFinState() )
+ clearOutData( state );
+ }
+
+ /* Fill in any new states made from merging. */
+ fillInStates( md );
+
+ /* Remove the misfits and turn off misfit accounting. */
+ removeMisfits();
+ setMisfitAccounting( false );
+}
+
+/* Concatenates other to the end of this machine. Other is deleted. Any
+ * transitions made leaving this machine and entering into other are notified
+ * that they are leaving transitions by having the leavingFromState callback
+ * invoked. */
+void FsmAp::concatOp( FsmAp *other )
+{
+ /* Assert same signedness and return graph concatenation op. */
+ doConcat( other, 0, false );
+}
+
+
+void FsmAp::doOr( FsmAp *other )
+{
+ /* For the merging process. */
+ MergeData md;
+
+ /* Build a state set consisting of both start states */
+ StateSet startStateSet;
+ startStateSet.insert( startState );
+ startStateSet.insert( other->startState );
+
+ /* Both of the original start states loose their start state status. */
+ unsetStartState();
+ other->unsetStartState();
+
+ /* Bring in the rest of other's entry points. */
+ copyInEntryPoints( other );
+ other->entryPoints.empty();
+
+ /* Merge the lists. This will move all the states from other
+ * into this. No states will be deleted. */
+ stateList.append( other->stateList );
+ misfitList.append( other->misfitList );
+
+ /* Move the final set data from other into this. */
+ finStateSet.insert(other->finStateSet);
+ other->finStateSet.empty();
+
+ /* Since other's list is empty, we can delete the fsm without
+ * affecting any states. */
+ delete other;
+
+ /* Create a new start state. */
+ setStartState( addState() );
+
+ /* Merge the start states. */
+ mergeStates( md, startState, startStateSet.data, startStateSet.length() );
+
+ /* Fill in any new states made from merging. */
+ fillInStates( md );
+}
+
+/* Unions other with this machine. Other is deleted. */
+void FsmAp::unionOp( FsmAp *other )
+{
+ /* Turn on misfit accounting for both graphs. */
+ setMisfitAccounting( true );
+ other->setMisfitAccounting( true );
+
+ /* Call Worker routine. */
+ doOr( other );
+
+ /* Remove the misfits and turn off misfit accounting. */
+ removeMisfits();
+ setMisfitAccounting( false );
+}
+
+/* Intersects other with this machine. Other is deleted. */
+void FsmAp::intersectOp( FsmAp *other )
+{
+ /* Turn on misfit accounting for both graphs. */
+ setMisfitAccounting( true );
+ other->setMisfitAccounting( true );
+
+ /* Set the fin bits on this and other to want each other. */
+ setFinBits( SB_GRAPH1 );
+ other->setFinBits( SB_GRAPH2 );
+
+ /* Call worker Or routine. */
+ doOr( other );
+
+ /* Unset any final states that are no longer to
+ * be final due to final bits. */
+ unsetIncompleteFinals();
+
+ /* Remove the misfits and turn off misfit accounting. */
+ removeMisfits();
+ setMisfitAccounting( false );
+
+ /* Remove states that have no path to a final state. */
+ removeDeadEndStates();
+}
+
+/* Set subtracts other machine from this machine. Other is deleted. */
+void FsmAp::subtractOp( FsmAp *other )
+{
+ /* Turn on misfit accounting for both graphs. */
+ setMisfitAccounting( true );
+ other->setMisfitAccounting( true );
+
+ /* Set the fin bits of other to be killers. */
+ other->setFinBits( SB_GRAPH1 );
+
+ /* Call worker Or routine. */
+ doOr( other );
+
+ /* Unset any final states that are no longer to
+ * be final due to final bits. */
+ unsetKilledFinals();
+
+ /* Remove the misfits and turn off misfit accounting. */
+ removeMisfits();
+ setMisfitAccounting( false );
+
+ /* Remove states that have no path to a final state. */
+ removeDeadEndStates();
+}
+
+bool FsmAp::inEptVect( EptVect *eptVect, StateAp *state )
+{
+ if ( eptVect != 0 ) {
+ /* Vect is there, walk it looking for state. */
+ for ( int i = 0; i < eptVect->length(); i++ ) {
+ if ( eptVect->data[i].targ == state )
+ return true;
+ }
+ }
+ return false;
+}
+
+/* Fill epsilon vectors in a root state from a given starting point. Epmploys
+ * a depth first search through the graph of epsilon transitions. */
+void FsmAp::epsilonFillEptVectFrom( StateAp *root, StateAp *from, bool parentLeaving )
+{
+ /* Walk the epsilon transitions out of the state. */
+ for ( EpsilonTrans::Iter ep = from->epsilonTrans; ep.lte(); ep++ ) {
+ /* Find the entry point, if the it does not resove, ignore it. */
+ EntryMapEl *enLow, *enHigh;
+ if ( entryPoints.findMulti( *ep, enLow, enHigh ) ) {
+ /* Loop the targets. */
+ for ( EntryMapEl *en = enLow; en <= enHigh; en++ ) {
+ /* Do not add the root or states already in eptVect. */
+ StateAp *targ = en->value;
+ if ( targ != from && !inEptVect(root->eptVect, targ) ) {
+ /* Maybe need to create the eptVect. */
+ if ( root->eptVect == 0 )
+ root->eptVect = new EptVect();
+
+ /* If moving to a different graph or if any parent is
+ * leaving then we are leaving. */
+ bool leaving = parentLeaving ||
+ root->owningGraph != targ->owningGraph;
+
+ /* All ok, add the target epsilon and recurse. */
+ root->eptVect->append( EptVectEl(targ, leaving) );
+ epsilonFillEptVectFrom( root, targ, leaving );
+ }
+ }
+ }
+ }
+}
+
+void FsmAp::shadowReadWriteStates( MergeData &md )
+{
+ /* Init isolatedShadow algorithm data. */
+ for ( StateList::Iter st = stateList; st.lte(); st++ )
+ st->isolatedShadow = 0;
+
+ /* Any states that may be both read from and written to must
+ * be shadowed. */
+ for ( StateList::Iter st = stateList; st.lte(); st++ ) {
+ /* Find such states by looping through stateVect lists, which give us
+ * the states that will be read from. May cause us to visit the states
+ * that we are interested in more than once. */
+ if ( st->eptVect != 0 ) {
+ /* For all states that will be read from. */
+ for ( EptVect::Iter ept = *st->eptVect; ept.lte(); ept++ ) {
+ /* Check for read and write to the same state. */
+ StateAp *targ = ept->targ;
+ if ( targ->eptVect != 0 ) {
+ /* State is to be written to, if the shadow is not already
+ * there, create it. */
+ if ( targ->isolatedShadow == 0 ) {
+ StateAp *shadow = addState();
+ mergeStates( md, shadow, targ );
+ targ->isolatedShadow = shadow;
+ }
+
+ /* Write shadow into the state vector so that it is the
+ * state that the epsilon transition will read from. */
+ ept->targ = targ->isolatedShadow;
+ }
+ }
+ }
+ }
+}
+
+void FsmAp::resolveEpsilonTrans( MergeData &md )
+{
+ /* Walk the state list and invoke recursive worker on each state. */
+ for ( StateList::Iter st = stateList; st.lte(); st++ )
+ epsilonFillEptVectFrom( st, st, false );
+
+ /* Prevent reading from and writing to of the same state. */
+ shadowReadWriteStates( md );
+
+ /* For all states that have epsilon transitions out, draw the transitions,
+ * clear the epsilon transitions. */
+ for ( StateList::Iter st = stateList; st.lte(); st++ ) {
+ /* If there is a state vector, then create the pre-merge state. */
+ if ( st->eptVect != 0 ) {
+ /* Merge all the epsilon targets into the state. */
+ for ( EptVect::Iter ept = *st->eptVect; ept.lte(); ept++ ) {
+ if ( ept->leaving )
+ mergeStatesLeaving( md, st, ept->targ );
+ else
+ mergeStates( md, st, ept->targ );
+ }
+
+ /* Clean up the target list. */
+ delete st->eptVect;
+ st->eptVect = 0;
+ }
+
+ /* Clear the epsilon transitions vector. */
+ st->epsilonTrans.empty();
+ }
+}
+
+void FsmAp::epsilonOp()
+{
+ /* For merging process. */
+ MergeData md;
+
+ setMisfitAccounting( true );
+
+ for ( StateList::Iter st = stateList; st.lte(); st++ )
+ st->owningGraph = 0;
+
+ /* Perform merges. */
+ resolveEpsilonTrans( md );
+
+ /* Epsilons can caused merges which leave behind unreachable states. */
+ fillInStates( md );
+
+ /* Remove the misfits and turn off misfit accounting. */
+ removeMisfits();
+ setMisfitAccounting( false );
+}
+
+/* Make a new maching by joining together a bunch of machines without making
+ * any transitions between them. A negative finalId results in there being no
+ * final id. */
+void FsmAp::joinOp( int startId, int finalId, FsmAp **others, int numOthers )
+{
+ /* For the merging process. */
+ MergeData md;
+
+ /* Set the owning machines. Start at one. Zero is reserved for the start
+ * and final states. */
+ for ( StateList::Iter st = stateList; st.lte(); st++ )
+ st->owningGraph = 1;
+ for ( int m = 0; m < numOthers; m++ ) {
+ for ( StateList::Iter st = others[m]->stateList; st.lte(); st++ )
+ st->owningGraph = 2+m;
+ }
+
+ /* All machines loose start state status. */
+ unsetStartState();
+ for ( int m = 0; m < numOthers; m++ )
+ others[m]->unsetStartState();
+
+ /* Bring the other machines into this. */
+ for ( int m = 0; m < numOthers; m++ ) {
+ /* Bring in the rest of other's entry points. */
+ copyInEntryPoints( others[m] );
+ others[m]->entryPoints.empty();
+
+ /* Merge the lists. This will move all the states from other into
+ * this. No states will be deleted. */
+ stateList.append( others[m]->stateList );
+ assert( others[m]->misfitList.length() == 0 );
+
+ /* Move the final set data from other into this. */
+ finStateSet.insert( others[m]->finStateSet );
+ others[m]->finStateSet.empty();
+
+ /* Since other's list is empty, we can delete the fsm without
+ * affecting any states. */
+ delete others[m];
+ }
+
+ /* Look up the start entry point. */
+ EntryMapEl *enLow = 0, *enHigh = 0;
+ bool findRes = entryPoints.findMulti( startId, enLow, enHigh );
+ if ( ! findRes ) {
+ /* No start state. Set a default one and proceed with the join. Note
+ * that the result of the join will be a very uninteresting machine. */
+ setStartState( addState() );
+ }
+ else {
+ /* There is at least one start state, create a state that will become
+ * the new start state. */
+ StateAp *newStart = addState();
+ setStartState( newStart );
+
+ /* The start state is in an owning machine class all it's own. */
+ newStart->owningGraph = 0;
+
+ /* Create the set of states to merge from. */
+ StateSet stateSet;
+ for ( EntryMapEl *en = enLow; en <= enHigh; en++ )
+ stateSet.insert( en->value );
+
+ /* Merge in the set of start states into the new start state. */
+ mergeStates( md, newStart, stateSet.data, stateSet.length() );
+ }
+
+ /* Take a copy of the final state set, before unsetting them all. This
+ * will allow us to call clearOutData on the states that don't get
+ * final state status back back. */
+ StateSet finStateSetCopy = finStateSet;
+
+ /* Now all final states are unset. */
+ unsetAllFinStates();
+
+ if ( finalId >= 0 ) {
+ /* Create the implicit final state. */
+ StateAp *finState = addState();
+ setFinState( finState );
+
+ /* Assign an entry into the final state on the final state entry id. Note
+ * that there may already be an entry on this id. That's ok. Also set the
+ * final state owning machine id. It's in a class all it's own. */
+ setEntry( finalId, finState );
+ finState->owningGraph = 0;
+ }
+
+ /* Hand over to workers for resolving epsilon trans. This will merge states
+ * with the targets of their epsilon transitions. */
+ resolveEpsilonTrans( md );
+
+ /* Invoke the relinquish final callback on any states that did not get
+ * final state status back. */
+ for ( StateSet::Iter st = finStateSetCopy; st.lte(); st++ ) {
+ if ( !((*st)->stateBits & SB_ISFINAL) )
+ clearOutData( *st );
+ }
+
+ /* Fill in any new states made from merging. */
+ fillInStates( md );
+
+ /* Joining can be messy. Instead of having misfit accounting on (which is
+ * tricky here) do a full cleaning. */
+ removeUnreachableStates();
+}
+
+void FsmAp::globOp( FsmAp **others, int numOthers )
+{
+ /* All other machines loose start states status. */
+ for ( int m = 0; m < numOthers; m++ )
+ others[m]->unsetStartState();
+
+ /* Bring the other machines into this. */
+ for ( int m = 0; m < numOthers; m++ ) {
+ /* Bring in the rest of other's entry points. */
+ copyInEntryPoints( others[m] );
+ others[m]->entryPoints.empty();
+
+ /* Merge the lists. This will move all the states from other into
+ * this. No states will be deleted. */
+ stateList.append( others[m]->stateList );
+ assert( others[m]->misfitList.length() == 0 );
+
+ /* Move the final set data from other into this. */
+ finStateSet.insert( others[m]->finStateSet );
+ others[m]->finStateSet.empty();
+
+ /* Since other's list is empty, we can delete the fsm without
+ * affecting any states. */
+ delete others[m];
+ }
+}
+
+void FsmAp::deterministicEntry()
+{
+ /* For the merging process. */
+ MergeData md;
+
+ /* States may loose their entry points, turn on misfit accounting. */
+ setMisfitAccounting( true );
+
+ /* Get a copy of the entry map then clear all the entry points. As we
+ * iterate the old entry map finding duplicates we will add the entry
+ * points for the new states that we create. */
+ EntryMap prevEntry = entryPoints;
+ unsetAllEntryPoints();
+
+ for ( int enId = 0; enId < prevEntry.length(); ) {
+ /* Count the number of states on this entry key. */
+ int highId = enId;
+ while ( highId < prevEntry.length() && prevEntry[enId].key == prevEntry[highId].key )
+ highId += 1;
+
+ int numIds = highId - enId;
+ if ( numIds == 1 ) {
+ /* Only a single entry point, just set the entry. */
+ setEntry( prevEntry[enId].key, prevEntry[enId].value );
+ }
+ else {
+ /* Multiple entry points, need to create a new state and merge in
+ * all the targets of entry points. */
+ StateAp *newEntry = addState();
+ for ( int en = enId; en < highId; en++ )
+ mergeStates( md, newEntry, prevEntry[en].value );
+
+ /* Add the new state as the single entry point. */
+ setEntry( prevEntry[enId].key, newEntry );
+ }
+
+ enId += numIds;
+ }
+
+ /* The old start state may be unreachable. Remove the misfits and turn off
+ * misfit accounting. */
+ removeMisfits();
+ setMisfitAccounting( false );
+}
+
+/* Unset any final states that are no longer to be final due to final bits. */
+void FsmAp::unsetKilledFinals()
+{
+ /* Duplicate the final state set before we begin modifying it. */
+ StateSet fin( finStateSet );
+
+ for ( int s = 0; s < fin.length(); s++ ) {
+ /* Check for killing bit. */
+ StateAp *state = fin.data[s];
+ if ( state->stateBits & SB_GRAPH1 ) {
+ /* One final state is a killer, set to non-final. */
+ unsetFinState( state );
+ }
+
+ /* Clear all killing bits. Non final states should never have had those
+ * state bits set in the first place. */
+ state->stateBits &= ~SB_GRAPH1;
+ }
+}
+
+/* Unset any final states that are no longer to be final due to final bits. */
+void FsmAp::unsetIncompleteFinals()
+{
+ /* Duplicate the final state set before we begin modifying it. */
+ StateSet fin( finStateSet );
+
+ for ( int s = 0; s < fin.length(); s++ ) {
+ /* Check for one set but not the other. */
+ StateAp *state = fin.data[s];
+ if ( state->stateBits & SB_BOTH &&
+ (state->stateBits & SB_BOTH) != SB_BOTH )
+ {
+ /* One state wants the other but it is not there. */
+ unsetFinState( state );
+ }
+
+ /* Clear wanting bits. Non final states should never have had those
+ * state bits set in the first place. */
+ state->stateBits &= ~SB_BOTH;
+ }
+}
+
+/* Ensure that the start state is free of entry points (aside from the fact
+ * that it is the start state). If the start state has entry points then Make a
+ * new start state by merging with the old one. Useful before modifying start
+ * transitions. If the existing start state has any entry points other than the
+ * start state entry then modifying its transitions changes more than the start
+ * transitions. So isolate the start state by separating it out such that it
+ * only has start stateness as it's entry point. */
+void FsmAp::isolateStartState( )
+{
+ /* For the merging process. */
+ MergeData md;
+
+ /* Bail out if the start state is already isolated. */
+ if ( isStartStateIsolated() )
+ return;
+
+ /* Turn on misfit accounting to possibly catch the old start state. */
+ setMisfitAccounting( true );
+
+ /* This will be the new start state. The existing start
+ * state is merged with it. */
+ StateAp *prevStartState = startState;
+ unsetStartState();
+ setStartState( addState() );
+
+ /* Merge the new start state with the old one to isolate it. */
+ mergeStates( md, startState, prevStartState );
+
+ /* Stfil and stateDict will be empty because the merging of the old start
+ * state into the new one will not have any conflicting transitions. */
+ assert( md.stateDict.treeSize == 0 );
+ assert( md.stfillHead == 0 );
+
+ /* The old start state may be unreachable. Remove the misfits and turn off
+ * misfit accounting. */
+ removeMisfits();
+ setMisfitAccounting( false );
+}
+
+#ifdef LOG_CONDS
+void logCondSpace( CondSpace *condSpace )
+{
+ if ( condSpace == 0 )
+ cerr << "<empty>";
+ else {
+ for ( CondSet::Iter csi = condSpace->condSet.last(); csi.gtb(); csi-- ) {
+ if ( ! csi.last() )
+ cerr << ',';
+ (*csi)->actionName( cerr );
+ }
+ }
+}
+
+void logNewExpansion( Expansion *exp )
+{
+ cerr << "created expansion:" << endl;
+ cerr << " range: " << exp->lowKey.getVal() << " .. " <<
+ exp->highKey.getVal() << endl;
+
+ cerr << " fromCondSpace: ";
+ logCondSpace( exp->fromCondSpace );
+ cerr << endl;
+ cerr << " fromVals: " << exp->fromVals << endl;
+
+ cerr << " toCondSpace: ";
+ logCondSpace( exp->toCondSpace );
+ cerr << endl;
+ cerr << " toValsList: ";
+ for ( LongVect::Iter to = exp->toValsList; to.lte(); to++ )
+ cerr << " " << *to;
+ cerr << endl;
+}
+#endif
+
+
+void FsmAp::findTransExpansions( ExpansionList &expansionList,
+ StateAp *destState, StateAp *srcState )
+{
+ PairIter<TransAp, StateCond> transCond( destState->outList.head,
+ srcState->stateCondList.head );
+ for ( ; !transCond.end(); transCond++ ) {
+ if ( transCond.userState == RangeOverlap ) {
+ Expansion *expansion = new Expansion( transCond.s1Tel.lowKey,
+ transCond.s1Tel.highKey );
+ expansion->fromTrans = new TransAp(*transCond.s1Tel.trans);
+ expansion->fromTrans->fromState = 0;
+ expansion->fromTrans->toState = transCond.s1Tel.trans->toState;
+ expansion->fromCondSpace = 0;
+ expansion->fromVals = 0;
+ CondSpace *srcCS = transCond.s2Tel.trans->condSpace;
+ expansion->toCondSpace = srcCS;
+
+ long numTargVals = (1 << srcCS->condSet.length());
+ for ( long targVals = 0; targVals < numTargVals; targVals++ )
+ expansion->toValsList.append( targVals );
+
+ #ifdef LOG_CONDS
+ logNewExpansion( expansion );
+ #endif
+ expansionList.append( expansion );
+ }
+ }
+}
+
+void FsmAp::findCondExpInTrans( ExpansionList &expansionList, StateAp *state,
+ Key lowKey, Key highKey, CondSpace *fromCondSpace, CondSpace *toCondSpace,
+ long fromVals, LongVect &toValsList )
+{
+ TransAp searchTrans;
+ searchTrans.lowKey = fromCondSpace->baseKey + fromVals * keyOps->alphSize() +
+ (lowKey - keyOps->minKey);
+ searchTrans.highKey = fromCondSpace->baseKey + fromVals * keyOps->alphSize() +
+ (highKey - keyOps->minKey);
+ searchTrans.prev = searchTrans.next = 0;
+
+ PairIter<TransAp> pairIter( state->outList.head, &searchTrans );
+ for ( ; !pairIter.end(); pairIter++ ) {
+ if ( pairIter.userState == RangeOverlap ) {
+ Expansion *expansion = new Expansion( lowKey, highKey );
+ expansion->fromTrans = new TransAp(*pairIter.s1Tel.trans);
+ expansion->fromTrans->fromState = 0;
+ expansion->fromTrans->toState = pairIter.s1Tel.trans->toState;
+ expansion->fromCondSpace = fromCondSpace;
+ expansion->fromVals = fromVals;
+ expansion->toCondSpace = toCondSpace;
+ expansion->toValsList = toValsList;
+
+ expansionList.append( expansion );
+ #ifdef LOG_CONDS
+ logNewExpansion( expansion );
+ #endif
+ }
+ }
+}
+
+void FsmAp::findCondExpansions( ExpansionList &expansionList,
+ StateAp *destState, StateAp *srcState )
+{
+ PairIter<StateCond, StateCond> condCond( destState->stateCondList.head,
+ srcState->stateCondList.head );
+ for ( ; !condCond.end(); condCond++ ) {
+ if ( condCond.userState == RangeOverlap ) {
+ /* Loop over all existing condVals . */
+ CondSet &destCS = condCond.s1Tel.trans->condSpace->condSet;
+ long destLen = destCS.length();
+
+ /* Find the items in src cond set that are not in dest
+ * cond set. These are the items that we must expand. */
+ CondSet srcOnlyCS = condCond.s2Tel.trans->condSpace->condSet;
+ for ( CondSet::Iter dcsi = destCS; dcsi.lte(); dcsi++ )
+ srcOnlyCS.remove( *dcsi );
+ long srcOnlyLen = srcOnlyCS.length();
+
+ if ( srcOnlyCS.length() > 0 ) {
+ #ifdef LOG_CONDS
+ cerr << "there are " << srcOnlyCS.length() << " item(s) that are "
+ "only in the srcCS" << endl;
+ #endif
+
+ CondSet mergedCS = destCS;
+ mergedCS.insert( condCond.s2Tel.trans->condSpace->condSet );
+
+ CondSpace *fromCondSpace = addCondSpace( destCS );
+ CondSpace *toCondSpace = addCondSpace( mergedCS );
+
+ /* Loop all values in the dest space. */
+ for ( long destVals = 0; destVals < (1 << destLen); destVals++ ) {
+ long basicVals = 0;
+ for ( CondSet::Iter csi = destCS; csi.lte(); csi++ ) {
+ if ( destVals & (1 << csi.pos()) ) {
+ Action **cim = mergedCS.find( *csi );
+ long bitPos = (cim - mergedCS.data);
+ basicVals |= 1 << bitPos;
+ }
+ }
+
+ /* Loop all new values. */
+ LongVect expandToVals;
+ for ( long soVals = 0; soVals < (1 << srcOnlyLen); soVals++ ) {
+ long targVals = basicVals;
+ for ( CondSet::Iter csi = srcOnlyCS; csi.lte(); csi++ ) {
+ if ( soVals & (1 << csi.pos()) ) {
+ Action **cim = mergedCS.find( *csi );
+ long bitPos = (cim - mergedCS.data);
+ targVals |= 1 << bitPos;
+ }
+ }
+ expandToVals.append( targVals );
+ }
+
+ findCondExpInTrans( expansionList, destState,
+ condCond.s1Tel.lowKey, condCond.s1Tel.highKey,
+ fromCondSpace, toCondSpace, destVals, expandToVals );
+ }
+ }
+ }
+ }
+}
+
+void FsmAp::doExpand( MergeData &md, StateAp *destState, ExpansionList &expList1 )
+{
+ for ( ExpansionList::Iter exp = expList1; exp.lte(); exp++ ) {
+ for ( LongVect::Iter to = exp->toValsList; to.lte(); to++ ) {
+ long targVals = *to;
+
+ /* We will use the copy of the transition that was made when the
+ * expansion was created. It will get used multiple times. Each
+ * time we must set up the keys, everything else is constant and
+ * and already prepared. */
+ TransAp *srcTrans = exp->fromTrans;
+
+ srcTrans->lowKey = exp->toCondSpace->baseKey +
+ targVals * keyOps->alphSize() + (exp->lowKey - keyOps->minKey);
+ srcTrans->highKey = exp->toCondSpace->baseKey +
+ targVals * keyOps->alphSize() + (exp->highKey - keyOps->minKey);
+
+ TransList srcList;
+ srcList.append( srcTrans );
+ outTransCopy( md, destState, srcList.head );
+ srcList.abandon();
+ }
+ }
+}
+
+
+void FsmAp::doRemove( MergeData &md, StateAp *destState, ExpansionList &expList1 )
+{
+ for ( ExpansionList::Iter exp = expList1; exp.lte(); exp++ ) {
+ Removal removal;
+ if ( exp->fromCondSpace == 0 ) {
+ removal.lowKey = exp->lowKey;
+ removal.highKey = exp->highKey;
+ }
+ else {
+ removal.lowKey = exp->fromCondSpace->baseKey +
+ exp->fromVals * keyOps->alphSize() + (exp->lowKey - keyOps->minKey);
+ removal.highKey = exp->fromCondSpace->baseKey +
+ exp->fromVals * keyOps->alphSize() + (exp->highKey - keyOps->minKey);
+ }
+ removal.next = 0;
+
+ TransList destList;
+ PairIter<TransAp, Removal> pairIter( destState->outList.head, &removal );
+ for ( ; !pairIter.end(); pairIter++ ) {
+ switch ( pairIter.userState ) {
+ case RangeInS1: {
+ TransAp *destTrans = pairIter.s1Tel.trans;
+ destTrans->lowKey = pairIter.s1Tel.lowKey;
+ destTrans->highKey = pairIter.s1Tel.highKey;
+ destList.append( destTrans );
+ break;
+ }
+ case RangeInS2:
+ break;
+ case RangeOverlap: {
+ TransAp *trans = pairIter.s1Tel.trans;
+ detachTrans( trans->fromState, trans->toState, trans );
+ delete trans;
+ break;
+ }
+ case BreakS1: {
+ pairIter.s1Tel.trans = dupTrans( destState,
+ pairIter.s1Tel.trans );
+ break;
+ }
+ case BreakS2:
+ break;
+ }
+ }
+ destState->outList.transfer( destList );
+ }
+}
+
+void FsmAp::mergeStateConds( StateAp *destState, StateAp *srcState )
+{
+ StateCondList destList;
+ PairIter<StateCond> pairIter( destState->stateCondList.head,
+ srcState->stateCondList.head );
+ for ( ; !pairIter.end(); pairIter++ ) {
+ switch ( pairIter.userState ) {
+ case RangeInS1: {
+ StateCond *destCond = pairIter.s1Tel.trans;
+ destCond->lowKey = pairIter.s1Tel.lowKey;
+ destCond->highKey = pairIter.s1Tel.highKey;
+ destList.append( destCond );
+ break;
+ }
+ case RangeInS2: {
+ StateCond *newCond = new StateCond( *pairIter.s2Tel.trans );
+ newCond->lowKey = pairIter.s2Tel.lowKey;
+ newCond->highKey = pairIter.s2Tel.highKey;
+ destList.append( newCond );
+ break;
+ }
+ case RangeOverlap: {
+ StateCond *destCond = pairIter.s1Tel.trans;
+ StateCond *srcCond = pairIter.s2Tel.trans;
+ CondSet mergedCondSet;
+ mergedCondSet.insert( destCond->condSpace->condSet );
+ mergedCondSet.insert( srcCond->condSpace->condSet );
+ destCond->condSpace = addCondSpace( mergedCondSet );
+
+ destCond->lowKey = pairIter.s1Tel.lowKey;
+ destCond->highKey = pairIter.s1Tel.highKey;
+ destList.append( destCond );
+ break;
+ }
+ case BreakS1:
+ pairIter.s1Tel.trans = new StateCond( *pairIter.s1Tel.trans );
+ break;
+
+ case BreakS2:
+ break;
+ }
+ }
+ destState->stateCondList.transfer( destList );
+}
+
+/* A state merge which represents the drawing in of leaving transitions. If
+ * there is any out data then we duplicate the souce state, transfer the out
+ * data, then merge in the state. The new state will be reaped because it will
+ * not be given any in transitions. */
+void FsmAp::mergeStatesLeaving( MergeData &md, StateAp *destState, StateAp *srcState )
+{
+ if ( !hasOutData( destState ) )
+ mergeStates( md, destState, srcState );
+ else {
+ StateAp *ssMutable = addState();
+ mergeStates( md, ssMutable, srcState );
+ transferOutData( ssMutable, destState );
+
+ for ( ActionSet::Iter cond = destState->outCondSet; cond.lte(); cond++ )
+ embedCondition( md, ssMutable, *cond );
+
+ mergeStates( md, destState, ssMutable );
+ }
+}
+
+void FsmAp::mergeStates( MergeData &md, StateAp *destState,
+ StateAp **srcStates, int numSrc )
+{
+ for ( int s = 0; s < numSrc; s++ )
+ mergeStates( md, destState, srcStates[s] );
+}
+
+void FsmAp::mergeStates( MergeData &md, StateAp *destState, StateAp *srcState )
+{
+ ExpansionList expList1;
+ ExpansionList expList2;
+
+ findTransExpansions( expList1, destState, srcState );
+ findCondExpansions( expList1, destState, srcState );
+ findTransExpansions( expList2, srcState, destState );
+ findCondExpansions( expList2, srcState, destState );
+
+ mergeStateConds( destState, srcState );
+
+ outTransCopy( md, destState, srcState->outList.head );
+
+ doExpand( md, destState, expList1 );
+ doExpand( md, destState, expList2 );
+
+ doRemove( md, destState, expList1 );
+ doRemove( md, destState, expList2 );
+
+ expList1.empty();
+ expList2.empty();
+
+ /* Get its bits and final state status. */
+ destState->stateBits |= ( srcState->stateBits & ~SB_ISFINAL );
+ if ( srcState->isFinState() )
+ setFinState( destState );
+
+ /* Draw in any properties of srcState into destState. */
+ if ( srcState == destState ) {
+ /* Duplicate the list to protect against write to source. The
+ * priorities sets are not copied in because that would have no
+ * effect. */
+ destState->epsilonTrans.append( EpsilonTrans( srcState->epsilonTrans ) );
+
+ /* Get all actions, duplicating to protect against write to source. */
+ destState->toStateActionTable.setActions(
+ ActionTable( srcState->toStateActionTable ) );
+ destState->fromStateActionTable.setActions(
+ ActionTable( srcState->fromStateActionTable ) );
+ destState->outActionTable.setActions( ActionTable( srcState->outActionTable ) );
+ destState->outCondSet.insert( ActionSet( srcState->outCondSet ) );
+ destState->errActionTable.setActions( ErrActionTable( srcState->errActionTable ) );
+ destState->eofActionTable.setActions( ActionTable( srcState->eofActionTable ) );
+ }
+ else {
+ /* Get the epsilons, out priorities. */
+ destState->epsilonTrans.append( srcState->epsilonTrans );
+ destState->outPriorTable.setPriors( srcState->outPriorTable );
+
+ /* Get all actions. */
+ destState->toStateActionTable.setActions( srcState->toStateActionTable );
+ destState->fromStateActionTable.setActions( srcState->fromStateActionTable );
+ destState->outActionTable.setActions( srcState->outActionTable );
+ destState->outCondSet.insert( srcState->outCondSet );
+ destState->errActionTable.setActions( srcState->errActionTable );
+ destState->eofActionTable.setActions( srcState->eofActionTable );
+ }
+}
+
+void FsmAp::fillInStates( MergeData &md )
+{
+ /* Merge any states that are awaiting merging. This will likey cause
+ * other states to be added to the stfil list. */
+ StateAp *state = md.stfillHead;
+ while ( state != 0 ) {
+ StateSet *stateSet = &state->stateDictEl->stateSet;
+ mergeStates( md, state, stateSet->data, stateSet->length() );
+ state = state->alg.next;
+ }
+
+ /* Delete the state sets of all states that are on the fill list. */
+ state = md.stfillHead;
+ while ( state != 0 ) {
+ /* Delete and reset the state set. */
+ delete state->stateDictEl;
+ state->stateDictEl = 0;
+
+ /* Next state in the stfill list. */
+ state = state->alg.next;
+ }
+
+ /* StateDict will still have its ptrs/size set but all of it's element
+ * will be deleted so we don't need to clean it up. */
+}
+
+void FsmAp::findEmbedExpansions( ExpansionList &expansionList,
+ StateAp *destState, Action *condAction )
+{
+ StateCondList destList;
+ PairIter<TransAp, StateCond> transCond( destState->outList.head,
+ destState->stateCondList.head );
+ for ( ; !transCond.end(); transCond++ ) {
+ switch ( transCond.userState ) {
+ case RangeInS1: {
+ if ( transCond.s1Tel.lowKey <= keyOps->maxKey ) {
+ assert( transCond.s1Tel.highKey <= keyOps->maxKey );
+
+ /* Make a new state cond. */
+ StateCond *newStateCond = new StateCond( transCond.s1Tel.lowKey,
+ transCond.s1Tel.highKey );
+ newStateCond->condSpace = addCondSpace( CondSet( condAction ) );
+ destList.append( newStateCond );
+
+ /* Create the expansion. */
+ Expansion *expansion = new Expansion( transCond.s1Tel.lowKey,
+ transCond.s1Tel.highKey );
+ expansion->fromTrans = new TransAp(*transCond.s1Tel.trans);
+ expansion->fromTrans->fromState = 0;
+ expansion->fromTrans->toState = transCond.s1Tel.trans->toState;
+ expansion->fromCondSpace = 0;
+ expansion->fromVals = 0;
+ expansion->toCondSpace = newStateCond->condSpace;
+ expansion->toValsList.append( 1 );
+ #ifdef LOG_CONDS
+ logNewExpansion( expansion );
+ #endif
+ expansionList.append( expansion );
+ }
+ break;
+ }
+ case RangeInS2: {
+ /* Enhance state cond and find the expansion. */
+ StateCond *stateCond = transCond.s2Tel.trans;
+ stateCond->lowKey = transCond.s2Tel.lowKey;
+ stateCond->highKey = transCond.s2Tel.highKey;
+
+ CondSet &destCS = stateCond->condSpace->condSet;
+ long destLen = destCS.length();
+ CondSpace *fromCondSpace = stateCond->condSpace;
+
+ CondSet mergedCS = destCS;
+ mergedCS.insert( condAction );
+ CondSpace *toCondSpace = addCondSpace( mergedCS );
+ stateCond->condSpace = toCondSpace;
+ destList.append( stateCond );
+
+ /* Loop all values in the dest space. */
+ for ( long destVals = 0; destVals < (1 << destLen); destVals++ ) {
+ long basicVals = 0;
+ for ( CondSet::Iter csi = destCS; csi.lte(); csi++ ) {
+ if ( destVals & (1 << csi.pos()) ) {
+ Action **cim = mergedCS.find( *csi );
+ long bitPos = (cim - mergedCS.data);
+ basicVals |= 1 << bitPos;
+ }
+ }
+
+ long targVals = basicVals;
+ Action **cim = mergedCS.find( condAction );
+ long bitPos = (cim - mergedCS.data);
+ targVals |= 1 << bitPos;
+
+ LongVect expandToVals( targVals );
+ findCondExpInTrans( expansionList, destState,
+ transCond.s2Tel.lowKey, transCond.s2Tel.highKey,
+ fromCondSpace, toCondSpace, destVals, expandToVals );
+ }
+ break;
+ }
+
+
+ case RangeOverlap:
+ case BreakS1:
+ case BreakS2:
+ assert( false );
+ break;
+ }
+ }
+
+ destState->stateCondList.transfer( destList );
+}
+
+void FsmAp::embedCondition( StateAp *state, Action *condAction )
+{
+ MergeData md;
+ ExpansionList expList;
+
+ /* Turn on misfit accounting to possibly catch the old start state. */
+ setMisfitAccounting( true );
+
+ /* Worker. */
+ embedCondition( md, state, condAction );
+
+ /* Fill in any states that were newed up as combinations of others. */
+ fillInStates( md );
+
+ /* Remove the misfits and turn off misfit accounting. */
+ removeMisfits();
+ setMisfitAccounting( false );
+}
+
+void FsmAp::embedCondition( MergeData &md, StateAp *state, Action *condAction )
+{
+ ExpansionList expList;
+
+ findEmbedExpansions( expList, state, condAction );
+ doExpand( md, state, expList );
+ doRemove( md, state, expList );
+ expList.empty();
+}
+
+/* Check if a machine defines a single character. This is useful in validating
+ * ranges and machines to export. */
+bool FsmAp::checkSingleCharMachine()
+{
+ /* Must have two states. */
+ if ( stateList.length() != 2 )
+ return false;
+ /* The start state cannot be final. */
+ if ( startState->isFinState() )
+ return false;
+ /* There should be only one final state. */
+ if ( finStateSet.length() != 1 )
+ return false;
+ /* The final state cannot have any transitions out. */
+ if ( finStateSet[0]->outList.length() != 0 )
+ return false;
+ /* The start state should have only one transition out. */
+ if ( startState->outList.length() != 1 )
+ return false;
+ /* The singe transition out of the start state should not be a range. */
+ TransAp *startTrans = startState->outList.head;
+ if ( startTrans->lowKey != startTrans->highKey )
+ return false;
+ return true;
+}
+
diff --git a/contrib/tools/ragel5/ragel/fsmgraph.h b/contrib/tools/ragel5/ragel/fsmgraph.h
new file mode 100644
index 0000000000..062031c3aa
--- /dev/null
+++ b/contrib/tools/ragel5/ragel/fsmgraph.h
@@ -0,0 +1,1482 @@
+/*
+ * Copyright 2001-2007 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _FSMGRAPH_H
+#define _FSMGRAPH_H
+
+#include <assert.h>
+#include <iostream>
+#include "common.h"
+#include "vector.h"
+#include "bstset.h"
+#include "compare.h"
+#include "avltree.h"
+#include "dlist.h"
+#include "bstmap.h"
+#include "sbstmap.h"
+#include "sbstset.h"
+#include "sbsttable.h"
+#include "avlset.h"
+#include "avlmap.h"
+#include "ragel.h"
+
+//#define LOG_CONDS
+
+/* Flags that control merging. */
+#define SB_GRAPH1 0x01
+#define SB_GRAPH2 0x02
+#define SB_BOTH 0x03
+#define SB_ISFINAL 0x04
+#define SB_ISMARKED 0x08
+#define SB_ONLIST 0x10
+
+using std::ostream;
+
+struct TransAp;
+struct StateAp;
+struct FsmAp;
+struct Action;
+struct LongestMatchPart;
+
+/* State list element for unambiguous access to list element. */
+struct FsmListEl
+{
+ StateAp *prev, *next;
+};
+
+/* This is the marked index for a state pair. Used in minimization. It keeps
+ * track of whether or not the state pair is marked. */
+struct MarkIndex
+{
+ MarkIndex(int states);
+ ~MarkIndex();
+
+ void markPair(int state1, int state2);
+ bool isPairMarked(int state1, int state2);
+
+private:
+ int numStates;
+ bool *array;
+};
+
+extern KeyOps *keyOps;
+
+/* Transistion Action Element. */
+typedef SBstMapEl< int, Action* > ActionTableEl;
+
+/* Nodes in the tree that use this action. */
+struct NameInst;
+struct InlineList;
+typedef Vector<NameInst*> ActionRefs;
+
+/* Element in list of actions. Contains the string for the code to exectute. */
+struct Action
+:
+ public DListEl<Action>,
+ public AvlTreeEl<Action>
+{
+public:
+
+ Action( const InputLoc &loc, const char *name, InlineList *inlineList, int condId )
+ :
+ loc(loc),
+ name(name),
+ inlineList(inlineList),
+ actionId(-1),
+ numTransRefs(0),
+ numToStateRefs(0),
+ numFromStateRefs(0),
+ numEofRefs(0),
+ numCondRefs(0),
+ anyCall(false),
+ isLmAction(false),
+ condId(condId)
+ {
+ }
+
+ /* Key for action dictionary. */
+ const char *getKey() const { return name; }
+
+ /* Data collected during parse. */
+ InputLoc loc;
+ const char *name;
+ InlineList *inlineList;
+ int actionId;
+
+ void actionName( ostream &out )
+ {
+ if ( name != 0 )
+ out << name;
+ else
+ out << loc.line << ":" << loc.col;
+ }
+
+ /* Places in the input text that reference the action. */
+ ActionRefs actionRefs;
+
+ /* Number of references in the final machine. */
+ int numRefs()
+ { return numTransRefs + numToStateRefs + numFromStateRefs + numEofRefs; }
+ int numTransRefs;
+ int numToStateRefs;
+ int numFromStateRefs;
+ int numEofRefs;
+ int numCondRefs;
+ bool anyCall;
+
+ bool isLmAction;
+ int condId;
+};
+
+struct CmpCondId
+{
+ static inline int compare( const Action *cond1, const Action *cond2 )
+ {
+ if ( cond1->condId < cond2->condId )
+ return -1;
+ else if ( cond1->condId > cond2->condId )
+ return 1;
+ return 0;
+ }
+};
+
+/* A list of actions. */
+typedef DList<Action> ActionList;
+typedef AvlTree<Action, char *, CmpStr> ActionDict;
+
+/* Structure for reverse action mapping. */
+struct RevActionMapEl
+{
+ char *name;
+ InputLoc location;
+};
+
+
+/* Transition Action Table. */
+struct ActionTable
+ : public SBstMap< int, Action*, CmpOrd<int> >
+{
+ void setAction( int ordering, Action *action );
+ void setActions( int *orderings, Action **actions, int nActs );
+ void setActions( const ActionTable &other );
+
+ bool hasAction( Action *action );
+};
+
+typedef SBstSet< Action*, CmpOrd<Action*> > ActionSet;
+typedef CmpSTable< Action*, CmpOrd<Action*> > CmpActionSet;
+
+/* Transistion Action Element. */
+typedef SBstMapEl< int, LongestMatchPart* > LmActionTableEl;
+
+/* Transition Action Table. */
+struct LmActionTable
+ : public SBstMap< int, LongestMatchPart*, CmpOrd<int> >
+{
+ void setAction( int ordering, LongestMatchPart *action );
+ void setActions( const LmActionTable &other );
+};
+
+/* Compare of a whole action table element (key & value). */
+struct CmpActionTableEl
+{
+ static int compare( const ActionTableEl &action1,
+ const ActionTableEl &action2 )
+ {
+ if ( action1.key < action2.key )
+ return -1;
+ else if ( action1.key > action2.key )
+ return 1;
+ else if ( action1.value < action2.value )
+ return -1;
+ else if ( action1.value > action2.value )
+ return 1;
+ return 0;
+ }
+};
+
+/* Compare for ActionTable. */
+typedef CmpSTable< ActionTableEl, CmpActionTableEl > CmpActionTable;
+
+/* Compare of a whole lm action table element (key & value). */
+struct CmpLmActionTableEl
+{
+ static int compare( const LmActionTableEl &lmAction1,
+ const LmActionTableEl &lmAction2 )
+ {
+ if ( lmAction1.key < lmAction2.key )
+ return -1;
+ else if ( lmAction1.key > lmAction2.key )
+ return 1;
+ else if ( lmAction1.value < lmAction2.value )
+ return -1;
+ else if ( lmAction1.value > lmAction2.value )
+ return 1;
+ return 0;
+ }
+};
+
+/* Compare for ActionTable. */
+typedef CmpSTable< LmActionTableEl, CmpLmActionTableEl > CmpLmActionTable;
+
+/* Action table element for error action tables. Adds the encoding of transfer
+ * point. */
+struct ErrActionTableEl
+{
+ ErrActionTableEl( Action *action, int ordering, int transferPoint )
+ : ordering(ordering), action(action), transferPoint(transferPoint) { }
+
+ /* Ordering and id of the action embedding. */
+ int ordering;
+ Action *action;
+
+ /* Id of point of transfere from Error action table to transtions and
+ * eofActionTable. */
+ int transferPoint;
+
+ int getKey() const { return ordering; }
+};
+
+struct ErrActionTable
+ : public SBstTable< ErrActionTableEl, int, CmpOrd<int> >
+{
+ void setAction( int ordering, Action *action, int transferPoint );
+ void setActions( const ErrActionTable &other );
+};
+
+/* Compare of an error action table element (key & value). */
+struct CmpErrActionTableEl
+{
+ static int compare( const ErrActionTableEl &action1,
+ const ErrActionTableEl &action2 )
+ {
+ if ( action1.ordering < action2.ordering )
+ return -1;
+ else if ( action1.ordering > action2.ordering )
+ return 1;
+ else if ( action1.action < action2.action )
+ return -1;
+ else if ( action1.action > action2.action )
+ return 1;
+ else if ( action1.transferPoint < action2.transferPoint )
+ return -1;
+ else if ( action1.transferPoint > action2.transferPoint )
+ return 1;
+ return 0;
+ }
+};
+
+/* Compare for ErrActionTable. */
+typedef CmpSTable< ErrActionTableEl, CmpErrActionTableEl > CmpErrActionTable;
+
+
+/* Descibe a priority, shared among PriorEls.
+ * Has key and whether or not used. */
+struct PriorDesc
+{
+ int key;
+ int priority;
+};
+
+/* Element in the arrays of priorities for transitions and arrays. Ordering is
+ * unique among instantiations of machines, desc is shared. */
+struct PriorEl
+{
+ PriorEl( int ordering, PriorDesc *desc )
+ : ordering(ordering), desc(desc) { }
+
+ int ordering;
+ PriorDesc *desc;
+};
+
+/* Compare priority elements, which are ordered by the priority descriptor
+ * key. */
+struct PriorElCmp
+{
+ static inline int compare( const PriorEl &pel1, const PriorEl &pel2 )
+ {
+ if ( pel1.desc->key < pel2.desc->key )
+ return -1;
+ else if ( pel1.desc->key > pel2.desc->key )
+ return 1;
+ else
+ return 0;
+ }
+};
+
+
+/* Priority Table. */
+struct PriorTable
+ : public SBstSet< PriorEl, PriorElCmp >
+{
+ void setPrior( int ordering, PriorDesc *desc );
+ void setPriors( const PriorTable &other );
+};
+
+/* Compare of prior table elements for distinguising state data. */
+struct CmpPriorEl
+{
+ static inline int compare( const PriorEl &pel1, const PriorEl &pel2 )
+ {
+ if ( pel1.desc < pel2.desc )
+ return -1;
+ else if ( pel1.desc > pel2.desc )
+ return 1;
+ else if ( pel1.ordering < pel2.ordering )
+ return -1;
+ else if ( pel1.ordering > pel2.ordering )
+ return 1;
+ return 0;
+ }
+};
+
+/* Compare of PriorTable distinguising state data. Using a compare of the
+ * pointers is a little more strict than it needs be. It requires that
+ * prioritiy tables have the exact same set of priority assignment operators
+ * (from the input lang) to be considered equal.
+ *
+ * Really only key-value pairs need be tested and ordering be merged. However
+ * this would require that in the fuseing of states, priority descriptors be
+ * chosen for the new fused state based on priority. Since the out transition
+ * lists and ranges aren't necessarily going to line up, this is more work for
+ * little gain. Final compression resets all priorities first, so this would
+ * only be useful for compression at every operator, which is only an
+ * undocumented test feature.
+ */
+typedef CmpSTable<PriorEl, CmpPriorEl> CmpPriorTable;
+
+/* Plain action list that imposes no ordering. */
+typedef Vector<int> TransFuncList;
+
+/* Comparison for TransFuncList. */
+typedef CmpTable< int, CmpOrd<int> > TransFuncListCompare;
+
+/* Transition class that implements actions and priorities. */
+struct TransAp
+{
+ TransAp() : fromState(0), toState(0) {}
+ TransAp( const TransAp &other ) :
+ lowKey(other.lowKey),
+ highKey(other.highKey),
+ fromState(0), toState(0),
+ actionTable(other.actionTable),
+ priorTable(other.priorTable)
+ {
+ assert( lmActionTable.length() == 0 && other.lmActionTable.length() == 0 );
+ }
+
+ Key lowKey, highKey;
+ StateAp *fromState;
+ StateAp *toState;
+
+ /* Pointers for outlist. */
+ TransAp *prev, *next;
+
+ /* Pointers for in-list. */
+ TransAp *ilprev, *ilnext;
+
+ /* The function table and priority for the transition. */
+ ActionTable actionTable;
+ PriorTable priorTable;
+
+ LmActionTable lmActionTable;
+};
+
+/* In transition list. Like DList except only has head pointers, which is all
+ * that is required. Insertion and deletion is handled by the graph. This
+ * class provides the iterator of a single list. */
+struct TransInList
+{
+ TransInList() : head(0) { }
+
+ TransAp *head;
+
+ struct Iter
+ {
+ /* Default construct. */
+ Iter() : ptr(0) { }
+
+ /* Construct, assign from a list. */
+ Iter( const TransInList &il ) : ptr(il.head) { }
+ Iter &operator=( const TransInList &dl ) { ptr = dl.head; return *this; }
+
+ /* At the end */
+ bool lte() const { return ptr != 0; }
+ bool end() const { return ptr == 0; }
+
+ /* At the first, last element. */
+ bool first() const { return ptr && ptr->ilprev == 0; }
+ bool last() const { return ptr && ptr->ilnext == 0; }
+
+ /* Cast, dereference, arrow ops. */
+ operator TransAp*() const { return ptr; }
+ TransAp &operator *() const { return *ptr; }
+ TransAp *operator->() const { return ptr; }
+
+ /* Increment, decrement. */
+ inline void operator++(int) { ptr = ptr->ilnext; }
+ inline void operator--(int) { ptr = ptr->ilprev; }
+
+ /* The iterator is simply a pointer. */
+ TransAp *ptr;
+ };
+};
+
+typedef DList<TransAp> TransList;
+
+/* Set of states, list of states. */
+typedef BstSet<StateAp*> StateSet;
+typedef DList<StateAp> StateList;
+
+/* A element in a state dict. */
+struct StateDictEl
+:
+ public AvlTreeEl<StateDictEl>
+{
+ StateDictEl(const StateSet &stateSet)
+ : stateSet(stateSet) { }
+
+ const StateSet &getKey() { return stateSet; }
+ StateSet stateSet;
+ StateAp *targState;
+};
+
+/* Dictionary mapping a set of states to a target state. */
+typedef AvlTree< StateDictEl, StateSet, CmpTable<StateAp*> > StateDict;
+
+/* Data needed for a merge operation. */
+struct MergeData
+{
+ MergeData()
+ : stfillHead(0), stfillTail(0) { }
+
+ StateDict stateDict;
+
+ StateAp *stfillHead;
+ StateAp *stfillTail;
+
+ void fillListAppend( StateAp *state );
+};
+
+struct TransEl
+{
+ /* Constructors. */
+ TransEl() { }
+ TransEl( Key lowKey, Key highKey )
+ : lowKey(lowKey), highKey(highKey) { }
+ TransEl( Key lowKey, Key highKey, TransAp *value )
+ : lowKey(lowKey), highKey(highKey), value(value) { }
+
+ Key lowKey, highKey;
+ TransAp *value;
+};
+
+struct CmpKey
+{
+ static int compare( const Key key1, const Key key2 )
+ {
+ if ( key1 < key2 )
+ return -1;
+ else if ( key1 > key2 )
+ return 1;
+ else
+ return 0;
+ }
+};
+
+/* Vector based set of key items. */
+typedef BstSet<Key, CmpKey> KeySet;
+
+struct MinPartition
+{
+ MinPartition() : active(false) { }
+
+ StateList list;
+ bool active;
+
+ MinPartition *prev, *next;
+};
+
+/* Epsilon transition stored in a state. Specifies the target */
+typedef Vector<int> EpsilonTrans;
+
+/* List of states that are to be drawn into this. */
+struct EptVectEl
+{
+ EptVectEl( StateAp *targ, bool leaving )
+ : targ(targ), leaving(leaving) { }
+
+ StateAp *targ;
+ bool leaving;
+};
+typedef Vector<EptVectEl> EptVect;
+
+/* Set of entry ids that go into this state. */
+typedef BstSet<int> EntryIdSet;
+
+/* Set of longest match items that may be active in a given state. */
+typedef BstSet<LongestMatchPart*> LmItemSet;
+
+/* Conditions. */
+typedef BstSet< Action*, CmpCondId > CondSet;
+typedef CmpTable< Action*, CmpCondId > CmpCondSet;
+
+struct CondSpace
+ : public AvlTreeEl<CondSpace>
+{
+ CondSpace( const CondSet &condSet )
+ : condSet(condSet) {}
+
+ const CondSet &getKey() { return condSet; }
+
+ CondSet condSet;
+ Key baseKey;
+ long condSpaceId;
+};
+
+typedef Vector<CondSpace*> CondSpaceVect;
+
+typedef AvlTree<CondSpace, CondSet, CmpCondSet> CondSpaceMap;
+
+struct StateCond
+{
+ StateCond( Key lowKey, Key highKey ) :
+ lowKey(lowKey), highKey(highKey) {}
+
+ Key lowKey;
+ Key highKey;
+ CondSpace *condSpace;
+
+ StateCond *prev, *next;
+};
+
+typedef DList<StateCond> StateCondList;
+typedef Vector<long> LongVect;
+
+struct Expansion
+{
+ Expansion( Key lowKey, Key highKey ) :
+ lowKey(lowKey), highKey(highKey),
+ fromTrans(0), fromCondSpace(0),
+ toCondSpace(0) {}
+
+ ~Expansion()
+ {
+ if ( fromTrans != 0 )
+ delete fromTrans;
+ }
+
+ Key lowKey;
+ Key highKey;
+
+ TransAp *fromTrans;
+ CondSpace *fromCondSpace;
+ long fromVals;
+
+ CondSpace *toCondSpace;
+ LongVect toValsList;
+
+ Expansion *prev, *next;
+};
+
+typedef DList<Expansion> ExpansionList;
+
+struct Removal
+{
+ Key lowKey;
+ Key highKey;
+
+ Removal *next;
+};
+
+struct CondData
+{
+ CondData() : nextCondKey(0) {}
+
+ /* Condition info. */
+ Key nextCondKey;
+
+ CondSpaceMap condSpaceMap;
+};
+
+extern CondData *condData;
+
+/* State class that implements actions and priorities. */
+struct StateAp
+{
+ StateAp();
+ StateAp(const StateAp &other);
+ ~StateAp();
+
+ /* Is the state final? */
+ bool isFinState() { return stateBits & SB_ISFINAL; }
+
+ /* Out transition list and the pointer for the default out trans. */
+ TransList outList;
+
+ /* In transition Lists. */
+ TransInList inList;
+
+ /* Entry points into the state. */
+ EntryIdSet entryIds;
+
+ /* Epsilon transitions. */
+ EpsilonTrans epsilonTrans;
+
+ /* Condition info. */
+ StateCondList stateCondList;
+
+ /* Number of in transitions from states other than ourselves. */
+ int foreignInTrans;
+
+ /* Temporary data for various algorithms. */
+ union {
+ /* When duplicating the fsm we need to map each
+ * state to the new state representing it. */
+ StateAp *stateMap;
+
+ /* When minimizing machines by partitioning, this maps to the group
+ * the state is in. */
+ MinPartition *partition;
+
+ /* When merging states (state machine operations) this next pointer is
+ * used for the list of states that need to be filled in. */
+ StateAp *next;
+
+ /* Identification for printing and stable minimization. */
+ int stateNum;
+
+ } alg;
+
+ /* Data used in epsilon operation, maybe fit into alg? */
+ StateAp *isolatedShadow;
+ int owningGraph;
+
+ /* A pointer to a dict element that contains the set of states this state
+ * represents. This cannot go into alg, because alg.next is used during
+ * the merging process. */
+ StateDictEl *stateDictEl;
+
+ /* When drawing epsilon transitions, holds the list of states to merge
+ * with. */
+ EptVect *eptVect;
+
+ /* Bits controlling the behaviour of the state during collapsing to dfa. */
+ int stateBits;
+
+ /* State list elements. */
+ StateAp *next, *prev;
+
+ /*
+ * Priority and Action data.
+ */
+
+ /* Out priorities transfered to out transitions. */
+ PriorTable outPriorTable;
+
+ /* The following two action tables are distinguished by the fact that when
+ * toState actions are executed immediatly after transition actions of
+ * incoming transitions and the current character will be the same as the
+ * one available then. The fromState actions are executed immediately
+ * before the transition actions of outgoing transitions and the current
+ * character is same as the one available then. */
+
+ /* Actions to execute upon entering into a state. */
+ ActionTable toStateActionTable;
+
+ /* Actions to execute when going from the state to the transition. */
+ ActionTable fromStateActionTable;
+
+ /* Actions to add to any future transitions that leave via this state. */
+ ActionTable outActionTable;
+
+ /* Conditions to add to any future transiions that leave via this sttate. */
+ ActionSet outCondSet;
+
+ /* Error action tables. */
+ ErrActionTable errActionTable;
+
+ /* Actions to execute on eof. */
+ ActionTable eofActionTable;
+
+ /* Set of longest match items that may be active in this state. */
+ LmItemSet lmItemSet;
+};
+
+template <class ListItem> struct NextTrans
+{
+ Key lowKey, highKey;
+ ListItem *trans;
+ ListItem *next;
+
+ void load() {
+ if ( trans == 0 )
+ next = 0;
+ else {
+ next = trans->next;
+ lowKey = trans->lowKey;
+ highKey = trans->highKey;
+ }
+ }
+
+ void set( ListItem *t ) {
+ trans = t;
+ load();
+ }
+
+ void increment() {
+ trans = next;
+ load();
+ }
+};
+
+
+/* Encodes the different states that are meaningful to the of the iterator. */
+enum PairIterUserState
+{
+ RangeInS1, RangeInS2,
+ RangeOverlap,
+ BreakS1, BreakS2
+};
+
+template <class ListItem1, class ListItem2 = ListItem1> struct PairIter
+{
+ /* Encodes the different states that an fsm iterator can be in. */
+ enum IterState {
+ Begin,
+ ConsumeS1Range, ConsumeS2Range,
+ OnlyInS1Range, OnlyInS2Range,
+ S1SticksOut, S1SticksOutBreak,
+ S2SticksOut, S2SticksOutBreak,
+ S1DragsBehind, S1DragsBehindBreak,
+ S2DragsBehind, S2DragsBehindBreak,
+ ExactOverlap, End
+ };
+
+ PairIter( ListItem1 *list1, ListItem2 *list2 );
+
+ /* Query iterator. */
+ bool lte() { return itState != End; }
+ bool end() { return itState == End; }
+ void operator++(int) { findNext(); }
+ void operator++() { findNext(); }
+
+ /* Iterator state. */
+ ListItem1 *list1;
+ ListItem2 *list2;
+ IterState itState;
+ PairIterUserState userState;
+
+ NextTrans<ListItem1> s1Tel;
+ NextTrans<ListItem2> s2Tel;
+ Key bottomLow, bottomHigh;
+ ListItem1 *bottomTrans1;
+ ListItem2 *bottomTrans2;
+
+private:
+ void findNext();
+};
+
+/* Init the iterator by advancing to the first item. */
+template <class ListItem1, class ListItem2> PairIter<ListItem1, ListItem2>::PairIter(
+ ListItem1 *list1, ListItem2 *list2 )
+:
+ list1(list1),
+ list2(list2),
+ itState(Begin)
+{
+ findNext();
+}
+
+/* Return and re-entry for the co-routine iterators. This should ALWAYS be
+ * used inside of a block. */
+#define CO_RETURN(label) \
+ itState = label; \
+ return; \
+ entry##label: backIn = true
+
+/* Return and re-entry for the co-routine iterators. This should ALWAYS be
+ * used inside of a block. */
+#define CO_RETURN2(label, uState) \
+ itState = label; \
+ userState = uState; \
+ return; \
+ entry##label: backIn = true
+
+/* Advance to the next transition. When returns, trans points to the next
+ * transition, unless there are no more, in which case end() returns true. */
+template <class ListItem1, class ListItem2> void PairIter<ListItem1, ListItem2>::findNext()
+{
+ /* This variable is used in dummy statements that follow the entry
+ * goto labels. The compiler needs some statement to follow the label. */
+ bool backIn;
+
+ /* Jump into the iterator routine base on the iterator state. */
+ switch ( itState ) {
+ case Begin: goto entryBegin;
+ case ConsumeS1Range: goto entryConsumeS1Range;
+ case ConsumeS2Range: goto entryConsumeS2Range;
+ case OnlyInS1Range: goto entryOnlyInS1Range;
+ case OnlyInS2Range: goto entryOnlyInS2Range;
+ case S1SticksOut: goto entryS1SticksOut;
+ case S1SticksOutBreak: goto entryS1SticksOutBreak;
+ case S2SticksOut: goto entryS2SticksOut;
+ case S2SticksOutBreak: goto entryS2SticksOutBreak;
+ case S1DragsBehind: goto entryS1DragsBehind;
+ case S1DragsBehindBreak: goto entryS1DragsBehindBreak;
+ case S2DragsBehind: goto entryS2DragsBehind;
+ case S2DragsBehindBreak: goto entryS2DragsBehindBreak;
+ case ExactOverlap: goto entryExactOverlap;
+ case End: goto entryEnd;
+ }
+
+entryBegin:
+ /* Set up the next structs at the head of the transition lists. */
+ s1Tel.set( list1 );
+ s2Tel.set( list2 );
+
+ /* Concurrently scan both out ranges. */
+ while ( true ) {
+ if ( s1Tel.trans == 0 ) {
+ /* We are at the end of state1's ranges. Process the rest of
+ * state2's ranges. */
+ while ( s2Tel.trans != 0 ) {
+ /* Range is only in s2. */
+ CO_RETURN2( ConsumeS2Range, RangeInS2 );
+ s2Tel.increment();
+ }
+ break;
+ }
+ else if ( s2Tel.trans == 0 ) {
+ /* We are at the end of state2's ranges. Process the rest of
+ * state1's ranges. */
+ while ( s1Tel.trans != 0 ) {
+ /* Range is only in s1. */
+ CO_RETURN2( ConsumeS1Range, RangeInS1 );
+ s1Tel.increment();
+ }
+ break;
+ }
+ /* Both state1's and state2's transition elements are good.
+ * The signiture of no overlap is a back key being in front of a
+ * front key. */
+ else if ( s1Tel.highKey < s2Tel.lowKey ) {
+ /* A range exists in state1 that does not overlap with state2. */
+ CO_RETURN2( OnlyInS1Range, RangeInS1 );
+ s1Tel.increment();
+ }
+ else if ( s2Tel.highKey < s1Tel.lowKey ) {
+ /* A range exists in state2 that does not overlap with state1. */
+ CO_RETURN2( OnlyInS2Range, RangeInS2 );
+ s2Tel.increment();
+ }
+ /* There is overlap, must mix the ranges in some way. */
+ else if ( s1Tel.lowKey < s2Tel.lowKey ) {
+ /* Range from state1 sticks out front. Must break it into
+ * non-overlaping and overlaping segments. */
+ bottomLow = s2Tel.lowKey;
+ bottomHigh = s1Tel.highKey;
+ s1Tel.highKey = s2Tel.lowKey;
+ s1Tel.highKey.decrement();
+ bottomTrans1 = s1Tel.trans;
+
+ /* Notify the caller that we are breaking s1. This gives them a
+ * chance to duplicate s1Tel[0,1].value. */
+ CO_RETURN2( S1SticksOutBreak, BreakS1 );
+
+ /* Broken off range is only in s1. */
+ CO_RETURN2( S1SticksOut, RangeInS1 );
+
+ /* Advance over the part sticking out front. */
+ s1Tel.lowKey = bottomLow;
+ s1Tel.highKey = bottomHigh;
+ s1Tel.trans = bottomTrans1;
+ }
+ else if ( s2Tel.lowKey < s1Tel.lowKey ) {
+ /* Range from state2 sticks out front. Must break it into
+ * non-overlaping and overlaping segments. */
+ bottomLow = s1Tel.lowKey;
+ bottomHigh = s2Tel.highKey;
+ s2Tel.highKey = s1Tel.lowKey;
+ s2Tel.highKey.decrement();
+ bottomTrans2 = s2Tel.trans;
+
+ /* Notify the caller that we are breaking s2. This gives them a
+ * chance to duplicate s2Tel[0,1].value. */
+ CO_RETURN2( S2SticksOutBreak, BreakS2 );
+
+ /* Broken off range is only in s2. */
+ CO_RETURN2( S2SticksOut, RangeInS2 );
+
+ /* Advance over the part sticking out front. */
+ s2Tel.lowKey = bottomLow;
+ s2Tel.highKey = bottomHigh;
+ s2Tel.trans = bottomTrans2;
+ }
+ /* Low ends are even. Are the high ends even? */
+ else if ( s1Tel.highKey < s2Tel.highKey ) {
+ /* Range from state2 goes longer than the range from state1. We
+ * must break the range from state2 into an evenly overlaping
+ * segment. */
+ bottomLow = s1Tel.highKey;
+ bottomLow.increment();
+ bottomHigh = s2Tel.highKey;
+ s2Tel.highKey = s1Tel.highKey;
+ bottomTrans2 = s2Tel.trans;
+
+ /* Notify the caller that we are breaking s2. This gives them a
+ * chance to duplicate s2Tel[0,1].value. */
+ CO_RETURN2( S2DragsBehindBreak, BreakS2 );
+
+ /* Breaking s2 produces exact overlap. */
+ CO_RETURN2( S2DragsBehind, RangeOverlap );
+
+ /* Advance over the front we just broke off of range 2. */
+ s2Tel.lowKey = bottomLow;
+ s2Tel.highKey = bottomHigh;
+ s2Tel.trans = bottomTrans2;
+
+ /* Advance over the entire s1Tel. We have consumed it. */
+ s1Tel.increment();
+ }
+ else if ( s2Tel.highKey < s1Tel.highKey ) {
+ /* Range from state1 goes longer than the range from state2. We
+ * must break the range from state1 into an evenly overlaping
+ * segment. */
+ bottomLow = s2Tel.highKey;
+ bottomLow.increment();
+ bottomHigh = s1Tel.highKey;
+ s1Tel.highKey = s2Tel.highKey;
+ bottomTrans1 = s1Tel.trans;
+
+ /* Notify the caller that we are breaking s1. This gives them a
+ * chance to duplicate s2Tel[0,1].value. */
+ CO_RETURN2( S1DragsBehindBreak, BreakS1 );
+
+ /* Breaking s1 produces exact overlap. */
+ CO_RETURN2( S1DragsBehind, RangeOverlap );
+
+ /* Advance over the front we just broke off of range 1. */
+ s1Tel.lowKey = bottomLow;
+ s1Tel.highKey = bottomHigh;
+ s1Tel.trans = bottomTrans1;
+
+ /* Advance over the entire s2Tel. We have consumed it. */
+ s2Tel.increment();
+ }
+ else {
+ /* There is an exact overlap. */
+ CO_RETURN2( ExactOverlap, RangeOverlap );
+
+ s1Tel.increment();
+ s2Tel.increment();
+ }
+ }
+
+ /* Done, go into end state. */
+ CO_RETURN( End );
+}
+
+
+/* Compare lists of epsilon transitions. Entries are name ids of targets. */
+typedef CmpTable< int, CmpOrd<int> > CmpEpsilonTrans;
+
+/* Compare class for the Approximate minimization. */
+class ApproxCompare
+{
+public:
+ ApproxCompare() { }
+ int compare( const StateAp *pState1, const StateAp *pState2 );
+};
+
+/* Compare class for the initial partitioning of a partition minimization. */
+class InitPartitionCompare
+{
+public:
+ InitPartitionCompare() { }
+ int compare( const StateAp *pState1, const StateAp *pState2 );
+};
+
+/* Compare class for the regular partitioning of a partition minimization. */
+class PartitionCompare
+{
+public:
+ PartitionCompare() { }
+ int compare( const StateAp *pState1, const StateAp *pState2 );
+};
+
+/* Compare class for a minimization that marks pairs. Provides the shouldMark
+ * routine. */
+class MarkCompare
+{
+public:
+ MarkCompare() { }
+ bool shouldMark( MarkIndex &markIndex, const StateAp *pState1,
+ const StateAp *pState2 );
+};
+
+/* List of partitions. */
+typedef DList< MinPartition > PartitionList;
+
+/* List of transtions out of a state. */
+typedef Vector<TransEl> TransListVect;
+
+/* Entry point map used for keeping track of entry points in a machine. */
+typedef BstSet< int > EntryIdSet;
+typedef BstMapEl< int, StateAp* > EntryMapEl;
+typedef BstMap< int, StateAp* > EntryMap;
+typedef Vector<EntryMapEl> EntryMapBase;
+
+/* Graph class that implements actions and priorities. */
+struct FsmAp
+{
+ /* Constructors/Destructors. */
+ FsmAp( );
+ FsmAp( const FsmAp &graph );
+ ~FsmAp();
+
+ /* The list of states. */
+ StateList stateList;
+ StateList misfitList;
+
+ /* The map of entry points. */
+ EntryMap entryPoints;
+
+ /* The start state. */
+ StateAp *startState;
+
+ /* Error state, possibly created only when the final machine has been
+ * created and the XML machine is about to be written. No transitions
+ * point to this state. */
+ StateAp *errState;
+
+ /* The set of final states. */
+ StateSet finStateSet;
+
+ /* Misfit Accounting. Are misfits put on a separate list. */
+ bool misfitAccounting;
+
+ /*
+ * Transition actions and priorities.
+ */
+
+ /* Set priorities on transtions. */
+ void startFsmPrior( int ordering, PriorDesc *prior );
+ void allTransPrior( int ordering, PriorDesc *prior );
+ void finishFsmPrior( int ordering, PriorDesc *prior );
+ void leaveFsmPrior( int ordering, PriorDesc *prior );
+
+ /* Action setting support. */
+ void transferErrorActions( StateAp *state, int transferPoint );
+ void setErrorAction( StateAp *state, int ordering, Action *action );
+
+ /* Fill all spaces in a transition list with an error transition. */
+ void fillGaps( StateAp *state );
+
+ /* Similar to setErrorAction, instead gives a state to go to on error. */
+ void setErrorTarget( StateAp *state, StateAp *target, int *orderings,
+ Action **actions, int nActs );
+
+ /* Set actions to execute. */
+ void startFsmAction( int ordering, Action *action );
+ void allTransAction( int ordering, Action *action );
+ void finishFsmAction( int ordering, Action *action );
+ void leaveFsmAction( int ordering, Action *action );
+ void longMatchAction( int ordering, LongestMatchPart *lmPart );
+
+ /* Set conditions. */
+ CondSpace *addCondSpace( const CondSet &condSet );
+
+ void findEmbedExpansions( ExpansionList &expansionList,
+ StateAp *destState, Action *condAction );
+ void embedCondition( MergeData &md, StateAp *state, Action *condAction );
+ void embedCondition( StateAp *state, Action *condAction );
+
+ void startFsmCondition( Action *condAction );
+ void allTransCondition( Action *condAction );
+ void leaveFsmCondition( Action *condAction );
+
+ /* Set error actions to execute. */
+ void startErrorAction( int ordering, Action *action, int transferPoint );
+ void allErrorAction( int ordering, Action *action, int transferPoint );
+ void finalErrorAction( int ordering, Action *action, int transferPoint );
+ void notStartErrorAction( int ordering, Action *action, int transferPoint );
+ void notFinalErrorAction( int ordering, Action *action, int transferPoint );
+ void middleErrorAction( int ordering, Action *action, int transferPoint );
+
+ /* Set EOF actions. */
+ void startEOFAction( int ordering, Action *action );
+ void allEOFAction( int ordering, Action *action );
+ void finalEOFAction( int ordering, Action *action );
+ void notStartEOFAction( int ordering, Action *action );
+ void notFinalEOFAction( int ordering, Action *action );
+ void middleEOFAction( int ordering, Action *action );
+
+ /* Set To State actions. */
+ void startToStateAction( int ordering, Action *action );
+ void allToStateAction( int ordering, Action *action );
+ void finalToStateAction( int ordering, Action *action );
+ void notStartToStateAction( int ordering, Action *action );
+ void notFinalToStateAction( int ordering, Action *action );
+ void middleToStateAction( int ordering, Action *action );
+
+ /* Set From State actions. */
+ void startFromStateAction( int ordering, Action *action );
+ void allFromStateAction( int ordering, Action *action );
+ void finalFromStateAction( int ordering, Action *action );
+ void notStartFromStateAction( int ordering, Action *action );
+ void notFinalFromStateAction( int ordering, Action *action );
+ void middleFromStateAction( int ordering, Action *action );
+
+ /* Shift the action ordering of the start transitions to start at
+ * fromOrder and increase in units of 1. Useful before kleene star
+ * operation. */
+ int shiftStartActionOrder( int fromOrder );
+
+ /* Clear all priorities from the fsm to so they won't affcet minimization
+ * of the final fsm. */
+ void clearAllPriorities();
+
+ /* Zero out all the function keys. */
+ void nullActionKeys();
+
+ /* Walk the list of states and verify state properties. */
+ void verifyStates();
+
+ /* Misfit Accounting. Are misfits put on a separate list. */
+ void setMisfitAccounting( bool val )
+ { misfitAccounting = val; }
+
+ /* Set and Unset a state as final. */
+ void setFinState( StateAp *state );
+ void unsetFinState( StateAp *state );
+
+ void setStartState( StateAp *state );
+ void unsetStartState( );
+
+ /* Set and unset a state as an entry point. */
+ void setEntry( int id, StateAp *state );
+ void changeEntry( int id, StateAp *to, StateAp *from );
+ void unsetEntry( int id, StateAp *state );
+ void unsetEntry( int id );
+ void unsetAllEntryPoints();
+
+ /* Epsilon transitions. */
+ void epsilonTrans( int id );
+ void shadowReadWriteStates( MergeData &md );
+
+ /*
+ * Basic attaching and detaching.
+ */
+
+ /* Common to attaching/detaching list and default. */
+ void attachToInList( StateAp *from, StateAp *to, TransAp *&head, TransAp *trans );
+ void detachFromInList( StateAp *from, StateAp *to, TransAp *&head, TransAp *trans );
+
+ /* Attach with a new transition. */
+ TransAp *attachNewTrans( StateAp *from, StateAp *to,
+ Key onChar1, Key onChar2 );
+
+ /* Attach with an existing transition that already in an out list. */
+ void attachTrans( StateAp *from, StateAp *to, TransAp *trans );
+
+ /* Redirect a transition away from error and towards some state. */
+ void redirectErrorTrans( StateAp *from, StateAp *to, TransAp *trans );
+
+ /* Detach a transition from a target state. */
+ void detachTrans( StateAp *from, StateAp *to, TransAp *trans );
+
+ /* Detach a state from the graph. */
+ void detachState( StateAp *state );
+
+ /*
+ * NFA to DFA conversion routines.
+ */
+
+ /* Duplicate a transition that will dropin to a free spot. */
+ TransAp *dupTrans( StateAp *from, TransAp *srcTrans );
+
+ /* In crossing, two transitions both go to real states. */
+ TransAp *fsmAttachStates( MergeData &md, StateAp *from,
+ TransAp *destTrans, TransAp *srcTrans );
+
+ /* Two transitions are to be crossed, handle the possibility of either
+ * going to the error state. */
+ TransAp *mergeTrans( MergeData &md, StateAp *from,
+ TransAp *destTrans, TransAp *srcTrans );
+
+ /* Compare deterimne relative priorities of two transition tables. */
+ int comparePrior( const PriorTable &priorTable1, const PriorTable &priorTable2 );
+
+ /* Cross a src transition with one that is already occupying a spot. */
+ TransAp *crossTransitions( MergeData &md, StateAp *from,
+ TransAp *destTrans, TransAp *srcTrans );
+
+ void outTransCopy( MergeData &md, StateAp *dest, TransAp *srcList );
+
+ void doRemove( MergeData &md, StateAp *destState, ExpansionList &expList1 );
+ void doExpand( MergeData &md, StateAp *destState, ExpansionList &expList1 );
+ void findCondExpInTrans( ExpansionList &expansionList, StateAp *state,
+ Key lowKey, Key highKey, CondSpace *fromCondSpace, CondSpace *toCondSpace,
+ long destVals, LongVect &toValsList );
+ void findTransExpansions( ExpansionList &expansionList,
+ StateAp *destState, StateAp *srcState );
+ void findCondExpansions( ExpansionList &expansionList,
+ StateAp *destState, StateAp *srcState );
+ void mergeStateConds( StateAp *destState, StateAp *srcState );
+
+ /* Merge a set of states into newState. */
+ void mergeStates( MergeData &md, StateAp *destState,
+ StateAp **srcStates, int numSrc );
+ void mergeStatesLeaving( MergeData &md, StateAp *destState, StateAp *srcState );
+ void mergeStates( MergeData &md, StateAp *destState, StateAp *srcState );
+
+ /* Make all states that are combinations of other states and that
+ * have not yet had their out transitions filled in. This will
+ * empty out stateDict and stFil. */
+ void fillInStates( MergeData &md );
+
+ /*
+ * Transition Comparison.
+ */
+
+ /* Compare transition data. Either of the pointers may be null. */
+ static inline int compareDataPtr( TransAp *trans1, TransAp *trans2 );
+
+ /* Compare target state and transition data. Either pointer may be null. */
+ static inline int compareFullPtr( TransAp *trans1, TransAp *trans2 );
+
+ /* Compare target partitions. Either pointer may be null. */
+ static inline int comparePartPtr( TransAp *trans1, TransAp *trans2 );
+
+ /* Check marked status of target states. Either pointer may be null. */
+ static inline bool shouldMarkPtr( MarkIndex &markIndex,
+ TransAp *trans1, TransAp *trans2 );
+
+ /*
+ * Callbacks.
+ */
+
+ /* Compare priority and function table of transitions. */
+ static int compareTransData( TransAp *trans1, TransAp *trans2 );
+
+ /* Add in the properties of srcTrans into this. */
+ void addInTrans( TransAp *destTrans, TransAp *srcTrans );
+
+ /* Compare states on data stored in the states. */
+ static int compareStateData( const StateAp *state1, const StateAp *state2 );
+
+ /* Out transition data. */
+ void clearOutData( StateAp *state );
+ bool hasOutData( StateAp *state );
+ void transferOutData( StateAp *destState, StateAp *srcState );
+
+ /*
+ * Allocation.
+ */
+
+ /* New up a state and add it to the graph. */
+ StateAp *addState();
+
+ /*
+ * Building basic machines
+ */
+
+ void concatFsm( Key c );
+ void concatFsm( Key *str, int len );
+ void concatFsmCI( Key *str, int len );
+ void orFsm( Key *set, int len );
+ void rangeFsm( Key low, Key high );
+ void rangeStarFsm( Key low, Key high );
+ void emptyFsm( );
+ void lambdaFsm( );
+
+ /*
+ * Fsm operators.
+ */
+
+ void starOp( );
+ void repeatOp( int times );
+ void optionalRepeatOp( int times );
+ void concatOp( FsmAp *other );
+ void unionOp( FsmAp *other );
+ void intersectOp( FsmAp *other );
+ void subtractOp( FsmAp *other );
+ void epsilonOp();
+ void joinOp( int startId, int finalId, FsmAp **others, int numOthers );
+ void globOp( FsmAp **others, int numOthers );
+ void deterministicEntry();
+
+ /*
+ * Operator workers
+ */
+
+ /* Determine if there are any entry points into a start state other than
+ * the start state. */
+ bool isStartStateIsolated();
+
+ /* Make a new start state that has no entry points. Will not change the
+ * identity of the fsm. */
+ void isolateStartState();
+
+ /* Workers for resolving epsilon transitions. */
+ bool inEptVect( EptVect *eptVect, StateAp *targ );
+ void epsilonFillEptVectFrom( StateAp *root, StateAp *from, bool parentLeaving );
+ void resolveEpsilonTrans( MergeData &md );
+
+ /* Workers for concatenation and union. */
+ void doConcat( FsmAp *other, StateSet *fromStates, bool optional );
+ void doOr( FsmAp *other );
+
+ /*
+ * Final states
+ */
+
+ /* Unset any final states that are no longer to be final
+ * due to final bits. */
+ void unsetIncompleteFinals();
+ void unsetKilledFinals();
+
+ /* Bring in other's entry points. Assumes others states are going to be
+ * copied into this machine. */
+ void copyInEntryPoints( FsmAp *other );
+
+ /* Ordering states. */
+ void depthFirstOrdering( StateAp *state );
+ void depthFirstOrdering();
+ void sortStatesByFinal();
+
+ /* Set sqequential state numbers starting at 0. */
+ void setStateNumbers( int base );
+
+ /* Unset all final states. */
+ void unsetAllFinStates();
+
+ /* Set the bits of final states and clear the bits of non final states. */
+ void setFinBits( int finStateBits );
+
+ /*
+ * Self-consistency checks.
+ */
+
+ /* Run a sanity check on the machine. */
+ void verifyIntegrity();
+
+ /* Verify that there are no unreachable states, or dead end states. */
+ void verifyReachability();
+ void verifyNoDeadEndStates();
+
+ /*
+ * Path pruning
+ */
+
+ /* Mark all states reachable from state. */
+ void markReachableFromHereReverse( StateAp *state );
+
+ /* Mark all states reachable from state. */
+ void markReachableFromHere( StateAp *state );
+ void markReachableFromHereStopFinal( StateAp *state );
+
+ /* Removes states that cannot be reached by any path in the fsm and are
+ * thus wasted silicon. */
+ void removeDeadEndStates();
+
+ /* Removes states that cannot be reached by any path in the fsm and are
+ * thus wasted silicon. */
+ void removeUnreachableStates();
+
+ /* Remove error actions from states on which the error transition will
+ * never be taken. */
+ bool outListCovers( StateAp *state );
+ bool anyErrorRange( StateAp *state );
+
+ /* Remove states that are on the misfit list. */
+ void removeMisfits();
+
+ /*
+ * FSM Minimization
+ */
+
+ /* Minimization by partitioning. */
+ void minimizePartition1();
+ void minimizePartition2();
+
+ /* Minimize the final state Machine. The result is the minimal fsm. Slow
+ * but stable, correct minimization. Uses n^2 space (lookout) and average
+ * n^2 time. Worst case n^3 time, but a that is a very rare case. */
+ void minimizeStable();
+
+ /* Minimize the final state machine. Does not find the minimal fsm, but a
+ * pretty good approximation. Does not use any extra space. Average n^2
+ * time. Worst case n^3 time, but a that is a very rare case. */
+ void minimizeApproximate();
+
+ /* This is the worker for the minimize approximate solution. It merges
+ * states that have identical out transitions. */
+ bool minimizeRound( );
+
+ /* Given an intial partioning of states, split partitions that have out trans
+ * to differing partitions. */
+ int partitionRound( StateAp **statePtrs, MinPartition *parts, int numParts );
+
+ /* Split partitions that have a transition to a previously split partition, until
+ * there are no more partitions to split. */
+ int splitCandidates( StateAp **statePtrs, MinPartition *parts, int numParts );
+
+ /* Fuse together states in the same partition. */
+ void fusePartitions( MinPartition *parts, int numParts );
+
+ /* Mark pairs where out final stateness differs, out trans data differs,
+ * trans pairs go to a marked pair or trans data differs. Should get
+ * alot of pairs. */
+ void initialMarkRound( MarkIndex &markIndex );
+
+ /* One marking round on all state pairs. Considers if trans pairs go
+ * to a marked state only. Returns whether or not a pair was marked. */
+ bool markRound( MarkIndex &markIndex );
+
+ /* Move the in trans into src into dest. */
+ void inTransMove(StateAp *dest, StateAp *src);
+
+ /* Make state src and dest the same state. */
+ void fuseEquivStates(StateAp *dest, StateAp *src);
+
+ /* Find any states that didn't get marked by the marking algorithm and
+ * merge them into the primary states of their equivalence class. */
+ void fuseUnmarkedPairs( MarkIndex &markIndex );
+
+ /* Merge neighboring transitions go to the same state and have the same
+ * transitions data. */
+ void compressTransitions();
+
+ /* Returns true if there is a transtion (either explicit or by a gap) to
+ * the error state. */
+ bool checkErrTrans( StateAp *state, TransAp *trans );
+ bool checkErrTransFinish( StateAp *state );
+ bool hasErrorTrans();
+
+ /* Check if a machine defines a single character. This is useful in
+ * validating ranges and machines to export. */
+ bool checkSingleCharMachine( );
+};
+
+
+#endif /* _FSMGRAPH_H */
diff --git a/contrib/tools/ragel5/ragel/fsmmin.cpp b/contrib/tools/ragel5/ragel/fsmmin.cpp
new file mode 100644
index 0000000000..046d11afa6
--- /dev/null
+++ b/contrib/tools/ragel5/ragel/fsmmin.cpp
@@ -0,0 +1,732 @@
+/*
+ * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "fsmgraph.h"
+#include "mergesort.h"
+
+int FsmAp::partitionRound( StateAp **statePtrs, MinPartition *parts, int numParts )
+{
+ /* Need a mergesort object and a single partition compare. */
+ MergeSort<StateAp*, PartitionCompare> mergeSort;
+ PartitionCompare partCompare;
+
+ /* For each partition. */
+ for ( int p = 0; p < numParts; p++ ) {
+ /* Fill the pointer array with the states in the partition. */
+ StateList::Iter state = parts[p].list;
+ for ( int s = 0; state.lte(); state++, s++ )
+ statePtrs[s] = state;
+
+ /* Sort the states using the partitioning compare. */
+ int numStates = parts[p].list.length();
+ mergeSort.sort( statePtrs, numStates );
+
+ /* Assign the states into partitions based on the results of the sort. */
+ int destPart = p, firstNewPart = numParts;
+ for ( int s = 1; s < numStates; s++ ) {
+ /* If this state differs from the last then move to the next partition. */
+ if ( partCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) {
+ /* The new partition is the next avail spot. */
+ destPart = numParts;
+ numParts += 1;
+ }
+
+ /* If the state is not staying in the first partition, then
+ * transfer it to its destination partition. */
+ if ( destPart != p ) {
+ StateAp *state = parts[p].list.detach( statePtrs[s] );
+ parts[destPart].list.append( state );
+ }
+ }
+
+ /* Fix the partition pointer for all the states that got moved to a new
+ * partition. This must be done after the states are transfered so the
+ * result of the sort is not altered. */
+ for ( int newPart = firstNewPart; newPart < numParts; newPart++ ) {
+ StateList::Iter state = parts[newPart].list;
+ for ( ; state.lte(); state++ )
+ state->alg.partition = &parts[newPart];
+ }
+ }
+
+ return numParts;
+}
+
+/**
+ * \brief Minimize by partitioning version 1.
+ *
+ * Repeatedly tries to split partitions until all partitions are unsplittable.
+ * Produces the most minimal FSM possible.
+ */
+void FsmAp::minimizePartition1()
+{
+ /* Need one mergesort object and partition compares. */
+ MergeSort<StateAp*, InitPartitionCompare> mergeSort;
+ InitPartitionCompare initPartCompare;
+
+ /* Nothing to do if there are no states. */
+ if ( stateList.length() == 0 )
+ return;
+
+ /*
+ * First thing is to partition the states by final state status and
+ * transition functions. This gives us an initial partitioning to work
+ * with.
+ */
+
+ /* Make a array of pointers to states. */
+ int numStates = stateList.length();
+ StateAp** statePtrs = new StateAp*[numStates];
+
+ /* Fill up an array of pointers to the states for easy sorting. */
+ StateList::Iter state = stateList;
+ for ( int s = 0; state.lte(); state++, s++ )
+ statePtrs[s] = state;
+
+ /* Sort the states using the array of states. */
+ mergeSort.sort( statePtrs, numStates );
+
+ /* An array of lists of states is used to partition the states. */
+ MinPartition *parts = new MinPartition[numStates];
+
+ /* Assign the states into partitions. */
+ int destPart = 0;
+ for ( int s = 0; s < numStates; s++ ) {
+ /* If this state differs from the last then move to the next partition. */
+ if ( s > 0 && initPartCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) {
+ /* Move to the next partition. */
+ destPart += 1;
+ }
+
+ /* Put the state into its partition. */
+ statePtrs[s]->alg.partition = &parts[destPart];
+ parts[destPart].list.append( statePtrs[s] );
+ }
+
+ /* We just moved all the states from the main list into partitions without
+ * taking them off the main list. So clean up the main list now. */
+ stateList.abandon();
+
+ /* Split partitions. */
+ int numParts = destPart + 1;
+ while ( true ) {
+ /* Test all partitions for splitting. */
+ int newNum = partitionRound( statePtrs, parts, numParts );
+
+ /* When no partitions can be split, stop. */
+ if ( newNum == numParts )
+ break;
+
+ numParts = newNum;
+ }
+
+ /* Fuse states in the same partition. The states will end up back on the
+ * main list. */
+ fusePartitions( parts, numParts );
+
+ /* Cleanup. */
+ delete[] statePtrs;
+ delete[] parts;
+}
+
+/* Split partitions that need splittting, decide which partitions might need
+ * to be split as a result, continue until there are no more that might need
+ * to be split. */
+int FsmAp::splitCandidates( StateAp **statePtrs, MinPartition *parts, int numParts )
+{
+ /* Need a mergesort and a partition compare. */
+ MergeSort<StateAp*, PartitionCompare> mergeSort;
+ PartitionCompare partCompare;
+
+ /* The lists of unsplitable (partList) and splitable partitions.
+ * Only partitions in the splitable list are check for needing splitting. */
+ PartitionList partList, splittable;
+
+ /* Initially, all partitions are born from a split (the initial
+ * partitioning) and can cause other partitions to be split. So any
+ * partition with a state with a transition out to another partition is a
+ * candidate for splitting. This will make every partition except possibly
+ * partitions of final states split candidates. */
+ for ( int p = 0; p < numParts; p++ ) {
+ /* Assume not active. */
+ parts[p].active = false;
+
+ /* Look for a trans out of any state in the partition. */
+ for ( StateList::Iter state = parts[p].list; state.lte(); state++ ) {
+ /* If there is at least one transition out to another state then
+ * the partition becomes splittable. */
+ if ( state->outList.length() > 0 ) {
+ parts[p].active = true;
+ break;
+ }
+ }
+
+ /* If it was found active then it goes on the splittable list. */
+ if ( parts[p].active )
+ splittable.append( &parts[p] );
+ else
+ partList.append( &parts[p] );
+ }
+
+ /* While there are partitions that are splittable, pull one off and try
+ * to split it. If it splits, determine which partitions may now be split
+ * as a result of the newly split partition. */
+ while ( splittable.length() > 0 ) {
+ MinPartition *partition = splittable.detachFirst();
+
+ /* Fill the pointer array with the states in the partition. */
+ StateList::Iter state = partition->list;
+ for ( int s = 0; state.lte(); state++, s++ )
+ statePtrs[s] = state;
+
+ /* Sort the states using the partitioning compare. */
+ int numStates = partition->list.length();
+ mergeSort.sort( statePtrs, numStates );
+
+ /* Assign the states into partitions based on the results of the sort. */
+ MinPartition *destPart = partition;
+ int firstNewPart = numParts;
+ for ( int s = 1; s < numStates; s++ ) {
+ /* If this state differs from the last then move to the next partition. */
+ if ( partCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) {
+ /* The new partition is the next avail spot. */
+ destPart = &parts[numParts];
+ numParts += 1;
+ }
+
+ /* If the state is not staying in the first partition, then
+ * transfer it to its destination partition. */
+ if ( destPart != partition ) {
+ StateAp *state = partition->list.detach( statePtrs[s] );
+ destPart->list.append( state );
+ }
+ }
+
+ /* Fix the partition pointer for all the states that got moved to a new
+ * partition. This must be done after the states are transfered so the
+ * result of the sort is not altered. */
+ int newPart;
+ for ( newPart = firstNewPart; newPart < numParts; newPart++ ) {
+ StateList::Iter state = parts[newPart].list;
+ for ( ; state.lte(); state++ )
+ state->alg.partition = &parts[newPart];
+ }
+
+ /* Put the partition we just split and any new partitions that came out
+ * of the split onto the inactive list. */
+ partition->active = false;
+ partList.append( partition );
+ for ( newPart = firstNewPart; newPart < numParts; newPart++ ) {
+ parts[newPart].active = false;
+ partList.append( &parts[newPart] );
+ }
+
+ if ( destPart == partition )
+ continue;
+
+ /* Now determine which partitions are splittable as a result of
+ * splitting partition by walking the in lists of the states in
+ * partitions that got split. Partition is the faked first item in the
+ * loop. */
+ MinPartition *causalPart = partition;
+ newPart = firstNewPart - 1;
+ while ( newPart < numParts ) {
+ /* Loop all states in the causal partition. */
+ StateList::Iter state = causalPart->list;
+ for ( ; state.lte(); state++ ) {
+ /* Walk all transition into the state and put the partition
+ * that the from state is in onto the splittable list. */
+ for ( TransInList::Iter trans = state->inList; trans.lte(); trans++ ) {
+ MinPartition *fromPart = trans->fromState->alg.partition;
+ if ( ! fromPart->active ) {
+ fromPart->active = true;
+ partList.detach( fromPart );
+ splittable.append( fromPart );
+ }
+ }
+ }
+
+ newPart += 1;
+ causalPart = &parts[newPart];
+ }
+ }
+ return numParts;
+}
+
+
+/**
+ * \brief Minimize by partitioning version 2 (best alg).
+ *
+ * Repeatedly tries to split partitions that may splittable until there are no
+ * more partitions that might possibly need splitting. Runs faster than
+ * version 1. Produces the most minimal fsm possible.
+ */
+void FsmAp::minimizePartition2()
+{
+ /* Need a mergesort and an initial partition compare. */
+ MergeSort<StateAp*, InitPartitionCompare> mergeSort;
+ InitPartitionCompare initPartCompare;
+
+ /* Nothing to do if there are no states. */
+ if ( stateList.length() == 0 )
+ return;
+
+ /*
+ * First thing is to partition the states by final state status and
+ * transition functions. This gives us an initial partitioning to work
+ * with.
+ */
+
+ /* Make a array of pointers to states. */
+ int numStates = stateList.length();
+ StateAp** statePtrs = new StateAp*[numStates];
+
+ /* Fill up an array of pointers to the states for easy sorting. */
+ StateList::Iter state = stateList;
+ for ( int s = 0; state.lte(); state++, s++ )
+ statePtrs[s] = state;
+
+ /* Sort the states using the array of states. */
+ mergeSort.sort( statePtrs, numStates );
+
+ /* An array of lists of states is used to partition the states. */
+ MinPartition *parts = new MinPartition[numStates];
+
+ /* Assign the states into partitions. */
+ int destPart = 0;
+ for ( int s = 0; s < numStates; s++ ) {
+ /* If this state differs from the last then move to the next partition. */
+ if ( s > 0 && initPartCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) {
+ /* Move to the next partition. */
+ destPart += 1;
+ }
+
+ /* Put the state into its partition. */
+ statePtrs[s]->alg.partition = &parts[destPart];
+ parts[destPart].list.append( statePtrs[s] );
+ }
+
+ /* We just moved all the states from the main list into partitions without
+ * taking them off the main list. So clean up the main list now. */
+ stateList.abandon();
+
+ /* Split partitions. */
+ int numParts = splitCandidates( statePtrs, parts, destPart+1 );
+
+ /* Fuse states in the same partition. The states will end up back on the
+ * main list. */
+ fusePartitions( parts, numParts );
+
+ /* Cleanup. */
+ delete[] statePtrs;
+ delete[] parts;
+}
+
+void FsmAp::initialMarkRound( MarkIndex &markIndex )
+{
+ /* P and q for walking pairs. */
+ StateAp *p = stateList.head, *q;
+
+ /* Need an initial partition compare. */
+ InitPartitionCompare initPartCompare;
+
+ /* Walk all unordered pairs of (p, q) where p != q.
+ * The second depth of the walk stops before reaching p. This
+ * gives us all unordered pairs of states (p, q) where p != q. */
+ while ( p != 0 ) {
+ q = stateList.head;
+ while ( q != p ) {
+ /* If the states differ on final state status, out transitions or
+ * any transition data then they should be separated on the initial
+ * round. */
+ if ( initPartCompare.compare( p, q ) != 0 )
+ markIndex.markPair( p->alg.stateNum, q->alg.stateNum );
+
+ q = q->next;
+ }
+ p = p->next;
+ }
+}
+
+bool FsmAp::markRound( MarkIndex &markIndex )
+{
+ /* P an q for walking pairs. Take note if any pair gets marked. */
+ StateAp *p = stateList.head, *q;
+ bool pairWasMarked = false;
+
+ /* Need a mark comparison. */
+ MarkCompare markCompare;
+
+ /* Walk all unordered pairs of (p, q) where p != q.
+ * The second depth of the walk stops before reaching p. This
+ * gives us all unordered pairs of states (p, q) where p != q. */
+ while ( p != 0 ) {
+ q = stateList.head;
+ while ( q != p ) {
+ /* Should we mark the pair? */
+ if ( !markIndex.isPairMarked( p->alg.stateNum, q->alg.stateNum ) ) {
+ if ( markCompare.shouldMark( markIndex, p, q ) ) {
+ markIndex.markPair( p->alg.stateNum, q->alg.stateNum );
+ pairWasMarked = true;
+ }
+ }
+ q = q->next;
+ }
+ p = p->next;
+ }
+
+ return pairWasMarked;
+}
+
+
+/**
+ * \brief Minimize by pair marking.
+ *
+ * Decides if each pair of states is distinct or not. Uses O(n^2) memory and
+ * should only be used on small graphs. Produces the most minmimal FSM
+ * possible.
+ */
+void FsmAp::minimizeStable()
+{
+ /* Set the state numbers. */
+ setStateNumbers( 0 );
+
+ /* This keeps track of which pairs have been marked. */
+ MarkIndex markIndex( stateList.length() );
+
+ /* Mark pairs where final stateness, out trans, or trans data differ. */
+ initialMarkRound( markIndex );
+
+ /* While the last round of marking succeeded in marking a state
+ * continue to do another round. */
+ int modified = markRound( markIndex );
+ while (modified)
+ modified = markRound( markIndex );
+
+ /* Merge pairs that are unmarked. */
+ fuseUnmarkedPairs( markIndex );
+}
+
+bool FsmAp::minimizeRound()
+{
+ /* Nothing to do if there are no states. */
+ if ( stateList.length() == 0 )
+ return false;
+
+ /* Need a mergesort on approx compare and an approx compare. */
+ MergeSort<StateAp*, ApproxCompare> mergeSort;
+ ApproxCompare approxCompare;
+
+ /* Fill up an array of pointers to the states. */
+ StateAp **statePtrs = new StateAp*[stateList.length()];
+ StateList::Iter state = stateList;
+ for ( int s = 0; state.lte(); state++, s++ )
+ statePtrs[s] = state;
+
+ bool modified = false;
+
+ /* Sort The list. */
+ mergeSort.sort( statePtrs, stateList.length() );
+
+ /* Walk the list looking for duplicates next to each other,
+ * merge in any duplicates. */
+ StateAp **pLast = statePtrs;
+ StateAp **pState = statePtrs + 1;
+ for ( int i = 1; i < stateList.length(); i++, pState++ ) {
+ if ( approxCompare.compare( *pLast, *pState ) == 0 ) {
+ /* Last and pState are the same, so fuse together. Move forward
+ * with pState but not with pLast. If any more are identical, we
+ * must */
+ fuseEquivStates( *pLast, *pState );
+ modified = true;
+ }
+ else {
+ /* Last and this are different, do not set to merge them. Move
+ * pLast to the current (it may be way behind from merging many
+ * states) and pState forward one to consider the next pair. */
+ pLast = pState;
+ }
+ }
+ delete[] statePtrs;
+ return modified;
+}
+
+/**
+ * \brief Minmimize by an approximation.
+ *
+ * Repeatedly tries to find states with transitions out to the same set of
+ * states on the same set of keys until no more identical states can be found.
+ * Does not produce the most minimial FSM possible.
+ */
+void FsmAp::minimizeApproximate()
+{
+ /* While the last minimization round succeeded in compacting states,
+ * continue to try to compact states. */
+ while ( true ) {
+ bool modified = minimizeRound();
+ if ( ! modified )
+ break;
+ }
+}
+
+
+/* Remove states that have no path to them from the start state. Recursively
+ * traverses the graph marking states that have paths into them. Then removes
+ * all states that did not get marked. */
+void FsmAp::removeUnreachableStates()
+{
+ /* Misfit accounting should be off and there should be no states on the
+ * misfit list. */
+ assert( !misfitAccounting && misfitList.length() == 0 );
+
+ /* Mark all the states that can be reached
+ * through the existing set of entry points. */
+ markReachableFromHere( startState );
+ for ( EntryMap::Iter en = entryPoints; en.lte(); en++ )
+ markReachableFromHere( en->value );
+
+ /* Delete all states that are not marked
+ * and unmark the ones that are marked. */
+ StateAp *state = stateList.head;
+ while ( state ) {
+ StateAp *next = state->next;
+
+ if ( state->stateBits & SB_ISMARKED )
+ state->stateBits &= ~ SB_ISMARKED;
+ else {
+ detachState( state );
+ stateList.detach( state );
+ delete state;
+ }
+
+ state = next;
+ }
+}
+
+bool FsmAp::outListCovers( StateAp *state )
+{
+ /* Must be at least one range to cover. */
+ if ( state->outList.length() == 0 )
+ return false;
+
+ /* The first must start at the lower bound. */
+ TransList::Iter trans = state->outList.first();
+ if ( keyOps->minKey < trans->lowKey )
+ return false;
+
+ /* Loop starts at second el. */
+ trans.increment();
+
+ /* Loop checks lower against prev upper. */
+ for ( ; trans.lte(); trans++ ) {
+ /* Lower end of the trans must be one greater than the
+ * previous' high end. */
+ Key lowKey = trans->lowKey;
+ lowKey.decrement();
+ if ( trans->prev->highKey < lowKey )
+ return false;
+ }
+
+ /* Require that the last range extends to the upper bound. */
+ trans = state->outList.last();
+ if ( trans->highKey < keyOps->maxKey )
+ return false;
+
+ return true;
+}
+
+/* Remove states that that do not lead to a final states. Works recursivly traversing
+ * the graph in reverse (starting from all final states) and marking seen states. Then
+ * removes states that did not get marked. */
+void FsmAp::removeDeadEndStates()
+{
+ /* Misfit accounting should be off and there should be no states on the
+ * misfit list. */
+ assert( !misfitAccounting && misfitList.length() == 0 );
+
+ /* Mark all states that have paths to the final states. */
+ StateAp **st = finStateSet.data;
+ int nst = finStateSet.length();
+ for ( int i = 0; i < nst; i++, st++ )
+ markReachableFromHereReverse( *st );
+
+ /* Start state gets honorary marking. If the machine accepts nothing we
+ * still want the start state to hang around. This must be done after the
+ * recursive call on all the final states so that it does not cause the
+ * start state in transitions to be skipped when the start state is
+ * visited by the traversal. */
+ startState->stateBits |= SB_ISMARKED;
+
+ /* Delete all states that are not marked
+ * and unmark the ones that are marked. */
+ StateAp *state = stateList.head;
+ while ( state != 0 ) {
+ StateAp *next = state->next;
+
+ if ( state->stateBits & SB_ISMARKED )
+ state->stateBits &= ~ SB_ISMARKED;
+ else {
+ detachState( state );
+ stateList.detach( state );
+ delete state;
+ }
+
+ state = next;
+ }
+}
+
+/* Remove states on the misfit list. To work properly misfit accounting should
+ * be on when this is called. The detaching of a state will likely cause
+ * another misfit to be collected and it can then be removed. */
+void FsmAp::removeMisfits()
+{
+ while ( misfitList.length() > 0 ) {
+ /* Get the first state. */
+ StateAp *state = misfitList.head;
+
+ /* Detach and delete. */
+ detachState( state );
+
+ /* The state was previously on the misfit list and detaching can only
+ * remove in transitions so the state must still be on the misfit
+ * list. */
+ misfitList.detach( state );
+ delete state;
+ }
+}
+
+/* Fuse src into dest because they have been deemed equivalent states.
+ * Involves moving transitions into src to go into dest and invoking
+ * callbacks. Src is deleted detached from the graph and deleted. */
+void FsmAp::fuseEquivStates( StateAp *dest, StateAp *src )
+{
+ /* This would get ugly. */
+ assert( dest != src );
+
+ /* Cur is a duplicate. We can merge it with trail. */
+ inTransMove( dest, src );
+
+ detachState( src );
+ stateList.detach( src );
+ delete src;
+}
+
+void FsmAp::fuseUnmarkedPairs( MarkIndex &markIndex )
+{
+ StateAp *p = stateList.head, *nextP, *q;
+
+ /* Definition: The primary state of an equivalence class is the first state
+ * encounterd that belongs to the equivalence class. All equivalence
+ * classes have primary state including equivalence classes with one state
+ * in it. */
+
+ /* For each unmarked pair merge p into q and delete p. q is always the
+ * primary state of it's equivalence class. We wouldn't have landed on it
+ * here if it were not, because it would have been deleted.
+ *
+ * Proof that q is the primaray state of it's equivalence class: Assume q
+ * is not the primary state of it's equivalence class, then it would be
+ * merged into some state that came before it and thus p would be
+ * equivalent to that state. But q is the first state that p is equivalent
+ * to so we have a contradiction. */
+
+ /* Walk all unordered pairs of (p, q) where p != q.
+ * The second depth of the walk stops before reaching p. This
+ * gives us all unordered pairs of states (p, q) where p != q. */
+ while ( p != 0 ) {
+ nextP = p->next;
+
+ q = stateList.head;
+ while ( q != p ) {
+ /* If one of p or q is a final state then mark. */
+ if ( ! markIndex.isPairMarked( p->alg.stateNum, q->alg.stateNum ) ) {
+ fuseEquivStates( q, p );
+ break;
+ }
+ q = q->next;
+ }
+ p = nextP;
+ }
+}
+
+void FsmAp::fusePartitions( MinPartition *parts, int numParts )
+{
+ /* For each partition, fuse state 2, 3, ... into state 1. */
+ for ( int p = 0; p < numParts; p++ ) {
+ /* Assume that there will always be at least one state. */
+ StateAp *first = parts[p].list.head, *toFuse = first->next;
+
+ /* Put the first state back onto the main state list. Don't bother
+ * removing it from the partition list first. */
+ stateList.append( first );
+
+ /* Fuse the rest of the state into the first. */
+ while ( toFuse != 0 ) {
+ /* Save the next. We will trash it before it is needed. */
+ StateAp *next = toFuse->next;
+
+ /* Put the state to be fused in to the first back onto the main
+ * list before it is fuse. the graph. The state needs to be on
+ * the main list for the detach from the graph to work. Don't
+ * bother removing the state from the partition list first. We
+ * need not maintain it. */
+ stateList.append( toFuse );
+
+ /* Now fuse to the first. */
+ fuseEquivStates( first, toFuse );
+
+ /* Go to the next that we saved before trashing the next pointer. */
+ toFuse = next;
+ }
+
+ /* We transfered the states from the partition list into the main list without
+ * removing the states from the partition list first. Clean it up. */
+ parts[p].list.abandon();
+ }
+}
+
+
+/* Merge neighboring transitions go to the same state and have the same
+ * transitions data. */
+void FsmAp::compressTransitions()
+{
+ for ( StateList::Iter st = stateList; st.lte(); st++ ) {
+ if ( st->outList.length() > 1 ) {
+ for ( TransList::Iter trans = st->outList, next = trans.next(); next.lte(); ) {
+ Key nextLow = next->lowKey;
+ nextLow.decrement();
+ if ( trans->highKey == nextLow && trans->toState == next->toState &&
+ CmpActionTable::compare( trans->actionTable, next->actionTable ) == 0 )
+ {
+ trans->highKey = next->highKey;
+ st->outList.detach( next );
+ detachTrans( next->fromState, next->toState, next );
+ delete next;
+ next = trans.next();
+ }
+ else {
+ trans.increment();
+ next.increment();
+ }
+ }
+ }
+ }
+}
diff --git a/contrib/tools/ragel5/ragel/fsmstate.cpp b/contrib/tools/ragel5/ragel/fsmstate.cpp
new file mode 100644
index 0000000000..4322c1060f
--- /dev/null
+++ b/contrib/tools/ragel5/ragel/fsmstate.cpp
@@ -0,0 +1,463 @@
+/*
+ * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <string.h>
+#include <assert.h>
+#include "fsmgraph.h"
+
+#include <iostream>
+using namespace std;
+
+/* Construct a mark index for a specified number of states. Must new up
+ * an array that is states^2 in size. */
+MarkIndex::MarkIndex( int states ) : numStates(states)
+{
+ /* Total pairs is states^2. Actually only use half of these, but we allocate
+ * them all to make indexing into the array easier. */
+ int total = states * states;
+
+ /* New up chars so that individual DListEl constructors are
+ * not called. Zero out the mem manually. */
+ array = new bool[total];
+ memset( array, 0, sizeof(bool) * total );
+}
+
+/* Free the array used to store state pairs. */
+MarkIndex::~MarkIndex()
+{
+ delete[] array;
+}
+
+/* Mark a pair of states. States are specified by their number. The
+ * marked states are moved from the unmarked list to the marked list. */
+void MarkIndex::markPair(int state1, int state2)
+{
+ int pos = ( state1 >= state2 ) ?
+ ( state1 * numStates ) + state2 :
+ ( state2 * numStates ) + state1;
+
+ array[pos] = true;
+}
+
+/* Returns true if the pair of states are marked. Returns false otherwise.
+ * Ordering of states given does not matter. */
+bool MarkIndex::isPairMarked(int state1, int state2)
+{
+ int pos = ( state1 >= state2 ) ?
+ ( state1 * numStates ) + state2 :
+ ( state2 * numStates ) + state1;
+
+ return array[pos];
+}
+
+/* Create a new fsm state. State has not out transitions or in transitions, not
+ * out out transition data and not number. */
+StateAp::StateAp()
+:
+ /* No out or in transitions. */
+ outList(),
+ inList(),
+
+ /* No entry points, or epsilon trans. */
+ entryIds(),
+ epsilonTrans(),
+
+ /* Conditions. */
+ stateCondList(),
+
+ /* No transitions in from other states. */
+ foreignInTrans(0),
+
+ /* Only used during merging. Normally null. */
+ stateDictEl(0),
+ eptVect(0),
+
+ /* No state identification bits. */
+ stateBits(0),
+
+ /* No Priority data. */
+ outPriorTable(),
+
+ /* No Action data. */
+ toStateActionTable(),
+ fromStateActionTable(),
+ outActionTable(),
+ outCondSet(),
+ errActionTable(),
+ eofActionTable()
+{
+}
+
+/* Copy everything except actual the transitions. That is left up to the
+ * FsmAp copy constructor. */
+StateAp::StateAp(const StateAp &other)
+:
+ /* All lists are cleared. They will be filled in when the
+ * individual transitions are duplicated and attached. */
+ outList(),
+ inList(),
+
+ /* Duplicate the entry id set and epsilon transitions. These
+ * are sets of integers and as such need no fixing. */
+ entryIds(other.entryIds),
+ epsilonTrans(other.epsilonTrans),
+
+ /* Copy in the elements of the conditions. */
+ stateCondList( other.stateCondList ),
+
+ /* No transitions in from other states. */
+ foreignInTrans(0),
+
+ /* This is only used during merging. Normally null. */
+ stateDictEl(0),
+ eptVect(0),
+
+ /* Fsm state data. */
+ stateBits(other.stateBits),
+
+ /* Copy in priority data. */
+ outPriorTable(other.outPriorTable),
+
+ /* Copy in action data. */
+ toStateActionTable(other.toStateActionTable),
+ fromStateActionTable(other.fromStateActionTable),
+ outActionTable(other.outActionTable),
+ outCondSet(other.outCondSet),
+ errActionTable(other.errActionTable),
+ eofActionTable(other.eofActionTable)
+{
+ /* Duplicate all the transitions. */
+ for ( TransList::Iter trans = other.outList; trans.lte(); trans++ ) {
+ /* Dupicate and store the orginal target in the transition. This will
+ * be corrected once all the states have been created. */
+ TransAp *newTrans = new TransAp(*trans);
+ newTrans->toState = trans->toState;
+ outList.append( newTrans );
+ }
+}
+
+/* If there is a state dict element, then delete it. Everything else is left
+ * up to the FsmGraph destructor. */
+StateAp::~StateAp()
+{
+ if ( stateDictEl != 0 )
+ delete stateDictEl;
+}
+
+/* Compare two states using pointers to the states. With the approximate
+ * compare the idea is that if the compare finds them the same, they can
+ * immediately be merged. */
+int ApproxCompare::compare( const StateAp *state1 , const StateAp *state2 )
+{
+ int compareRes;
+
+ /* Test final state status. */
+ if ( (state1->stateBits & SB_ISFINAL) && !(state2->stateBits & SB_ISFINAL) )
+ return -1;
+ else if ( !(state1->stateBits & SB_ISFINAL) && (state2->stateBits & SB_ISFINAL) )
+ return 1;
+
+ /* Test epsilon transition sets. */
+ compareRes = CmpEpsilonTrans::compare( state1->epsilonTrans,
+ state2->epsilonTrans );
+ if ( compareRes != 0 )
+ return compareRes;
+
+ /* Compare the out transitions. */
+ compareRes = FsmAp::compareStateData( state1, state2 );
+ if ( compareRes != 0 )
+ return compareRes;
+
+ /* Use a pair iterator to get the transition pairs. */
+ PairIter<TransAp> outPair( state1->outList.head, state2->outList.head );
+ for ( ; !outPair.end(); outPair++ ) {
+ switch ( outPair.userState ) {
+
+ case RangeInS1:
+ compareRes = FsmAp::compareFullPtr( outPair.s1Tel.trans, 0 );
+ if ( compareRes != 0 )
+ return compareRes;
+ break;
+
+ case RangeInS2:
+ compareRes = FsmAp::compareFullPtr( 0, outPair.s2Tel.trans );
+ if ( compareRes != 0 )
+ return compareRes;
+ break;
+
+ case RangeOverlap:
+ compareRes = FsmAp::compareFullPtr(
+ outPair.s1Tel.trans, outPair.s2Tel.trans );
+ if ( compareRes != 0 )
+ return compareRes;
+ break;
+
+ case BreakS1:
+ case BreakS2:
+ break;
+ }
+ }
+
+ /* Got through the entire state comparison, deem them equal. */
+ return 0;
+}
+
+/* Compare class for the sort that does the intial partition of compaction. */
+int InitPartitionCompare::compare( const StateAp *state1 , const StateAp *state2 )
+{
+ int compareRes;
+
+ /* Test final state status. */
+ if ( (state1->stateBits & SB_ISFINAL) && !(state2->stateBits & SB_ISFINAL) )
+ return -1;
+ else if ( !(state1->stateBits & SB_ISFINAL) && (state2->stateBits & SB_ISFINAL) )
+ return 1;
+
+ /* Test epsilon transition sets. */
+ compareRes = CmpEpsilonTrans::compare( state1->epsilonTrans,
+ state2->epsilonTrans );
+ if ( compareRes != 0 )
+ return compareRes;
+
+ /* Compare the out transitions. */
+ compareRes = FsmAp::compareStateData( state1, state2 );
+ if ( compareRes != 0 )
+ return compareRes;
+
+ /* Use a pair iterator to test the condition pairs. */
+ PairIter<StateCond> condPair( state1->stateCondList.head, state2->stateCondList.head );
+ for ( ; !condPair.end(); condPair++ ) {
+ switch ( condPair.userState ) {
+ case RangeInS1:
+ return 1;
+ case RangeInS2:
+ return -1;
+
+ case RangeOverlap: {
+ CondSpace *condSpace1 = condPair.s1Tel.trans->condSpace;
+ CondSpace *condSpace2 = condPair.s2Tel.trans->condSpace;
+ if ( condSpace1 < condSpace2 )
+ return -1;
+ else if ( condSpace1 > condSpace2 )
+ return 1;
+ break;
+ }
+ case BreakS1:
+ case BreakS2:
+ break;
+ }
+ }
+
+ /* Use a pair iterator to test the transition pairs. */
+ PairIter<TransAp> outPair( state1->outList.head, state2->outList.head );
+ for ( ; !outPair.end(); outPair++ ) {
+ switch ( outPair.userState ) {
+
+ case RangeInS1:
+ compareRes = FsmAp::compareDataPtr( outPair.s1Tel.trans, 0 );
+ if ( compareRes != 0 )
+ return compareRes;
+ break;
+
+ case RangeInS2:
+ compareRes = FsmAp::compareDataPtr( 0, outPair.s2Tel.trans );
+ if ( compareRes != 0 )
+ return compareRes;
+ break;
+
+ case RangeOverlap:
+ compareRes = FsmAp::compareDataPtr(
+ outPair.s1Tel.trans, outPair.s2Tel.trans );
+ if ( compareRes != 0 )
+ return compareRes;
+ break;
+
+ case BreakS1:
+ case BreakS2:
+ break;
+ }
+ }
+
+ return 0;
+}
+
+/* Compare class for the sort that does the partitioning. */
+int PartitionCompare::compare( const StateAp *state1, const StateAp *state2 )
+{
+ int compareRes;
+
+ /* Use a pair iterator to get the transition pairs. */
+ PairIter<TransAp> outPair( state1->outList.head, state2->outList.head );
+ for ( ; !outPair.end(); outPair++ ) {
+ switch ( outPair.userState ) {
+
+ case RangeInS1:
+ compareRes = FsmAp::comparePartPtr( outPair.s1Tel.trans, 0 );
+ if ( compareRes != 0 )
+ return compareRes;
+ break;
+
+ case RangeInS2:
+ compareRes = FsmAp::comparePartPtr( 0, outPair.s2Tel.trans );
+ if ( compareRes != 0 )
+ return compareRes;
+ break;
+
+ case RangeOverlap:
+ compareRes = FsmAp::comparePartPtr(
+ outPair.s1Tel.trans, outPair.s2Tel.trans );
+ if ( compareRes != 0 )
+ return compareRes;
+ break;
+
+ case BreakS1:
+ case BreakS2:
+ break;
+ }
+ }
+
+ return 0;
+}
+
+/* Compare class for the sort that does the partitioning. */
+bool MarkCompare::shouldMark( MarkIndex &markIndex, const StateAp *state1,
+ const StateAp *state2 )
+{
+ /* Use a pair iterator to get the transition pairs. */
+ PairIter<TransAp> outPair( state1->outList.head, state2->outList.head );
+ for ( ; !outPair.end(); outPair++ ) {
+ switch ( outPair.userState ) {
+
+ case RangeInS1:
+ if ( FsmAp::shouldMarkPtr( markIndex, outPair.s1Tel.trans, 0 ) )
+ return true;
+ break;
+
+ case RangeInS2:
+ if ( FsmAp::shouldMarkPtr( markIndex, 0, outPair.s2Tel.trans ) )
+ return true;
+ break;
+
+ case RangeOverlap:
+ if ( FsmAp::shouldMarkPtr( markIndex,
+ outPair.s1Tel.trans, outPair.s2Tel.trans ) )
+ return true;
+ break;
+
+ case BreakS1:
+ case BreakS2:
+ break;
+ }
+ }
+
+ return false;
+}
+
+/*
+ * Transition Comparison.
+ */
+
+/* Compare target partitions. Either pointer may be null. */
+int FsmAp::comparePartPtr( TransAp *trans1, TransAp *trans2 )
+{
+ if ( trans1 != 0 ) {
+ /* If trans1 is set then so should trans2. The initial partitioning
+ * guarantees this for us. */
+ if ( trans1->toState == 0 && trans2->toState != 0 )
+ return -1;
+ else if ( trans1->toState != 0 && trans2->toState == 0 )
+ return 1;
+ else if ( trans1->toState != 0 ) {
+ /* Both of targets are set. */
+ return CmpOrd< MinPartition* >::compare(
+ trans1->toState->alg.partition, trans2->toState->alg.partition );
+ }
+ }
+ return 0;
+}
+
+
+/* Compares two transition pointers according to priority and functions.
+ * Either pointer may be null. Does not consider to state or from state. */
+int FsmAp::compareDataPtr( TransAp *trans1, TransAp *trans2 )
+{
+ if ( trans1 == 0 && trans2 != 0 )
+ return -1;
+ else if ( trans1 != 0 && trans2 == 0 )
+ return 1;
+ else if ( trans1 != 0 ) {
+ /* Both of the transition pointers are set. */
+ int compareRes = compareTransData( trans1, trans2 );
+ if ( compareRes != 0 )
+ return compareRes;
+ }
+ return 0;
+}
+
+/* Compares two transitions according to target state, priority and functions.
+ * Does not consider from state. Either of the pointers may be null. */
+int FsmAp::compareFullPtr( TransAp *trans1, TransAp *trans2 )
+{
+ if ( (trans1 != 0) ^ (trans2 != 0) ) {
+ /* Exactly one of the transitions is set. */
+ if ( trans1 != 0 )
+ return -1;
+ else
+ return 1;
+ }
+ else if ( trans1 != 0 ) {
+ /* Both of the transition pointers are set. Test target state,
+ * priority and funcs. */
+ if ( trans1->toState < trans2->toState )
+ return -1;
+ else if ( trans1->toState > trans2->toState )
+ return 1;
+ else if ( trans1->toState != 0 ) {
+ /* Test transition data. */
+ int compareRes = compareTransData( trans1, trans2 );
+ if ( compareRes != 0 )
+ return compareRes;
+ }
+ }
+ return 0;
+}
+
+
+bool FsmAp::shouldMarkPtr( MarkIndex &markIndex, TransAp *trans1,
+ TransAp *trans2 )
+{
+ if ( (trans1 != 0) ^ (trans2 != 0) ) {
+ /* Exactly one of the transitions is set. The initial mark round
+ * should rule out this case. */
+ assert( false );
+ }
+ else if ( trans1 != 0 ) {
+ /* Both of the transitions are set. If the target pair is marked, then
+ * the pair we are considering gets marked. */
+ return markIndex.isPairMarked( trans1->toState->alg.stateNum,
+ trans2->toState->alg.stateNum );
+ }
+
+ /* Neither of the transitiosn are set. */
+ return false;
+}
+
+
diff --git a/contrib/tools/ragel5/ragel/main.cpp b/contrib/tools/ragel5/ragel/main.cpp
new file mode 100644
index 0000000000..a22a34f1b0
--- /dev/null
+++ b/contrib/tools/ragel5/ragel/main.cpp
@@ -0,0 +1,355 @@
+/*
+ * Copyright 2001-2007 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <iostream>
+#include <fstream>
+#ifndef _WIN32
+# include <unistd.h>
+#endif
+#include <sstream>
+
+/* Parsing. */
+#include "ragel.h"
+#include "rlscan.h"
+
+/* Parameters and output. */
+#include "pcheck.h"
+#include "vector.h"
+#include "version.h"
+#include "common.h"
+
+#ifdef _MSC_VER
+# define strncasecmp _strnicmp
+# define strcasecmp _stricmp
+#endif
+
+using std::istream;
+using std::ostream;
+using std::ifstream;
+using std::ofstream;
+using std::cin;
+using std::cout;
+using std::cerr;
+using std::endl;
+
+/* Controls minimization. */
+MinimizeLevel minimizeLevel = MinimizePartition2;
+MinimizeOpt minimizeOpt = MinimizeMostOps;
+
+/* Graphviz dot file generation. */
+char *machineSpec = 0, *machineName = 0;
+bool machineSpecFound = false;
+
+bool printStatistics = false;
+
+/* Print a summary of the options. */
+void usage()
+{
+ cout <<
+"usage: ragel [options] file\n"
+"general:\n"
+" -h, -H, -?, --help Print this usage and exit\n"
+" -v, --version Print version information and exit\n"
+" -o <file> Write output to <file>\n"
+" -s Print some statistics on stderr\n"
+"fsm minimization:\n"
+" -n Do not perform minimization\n"
+" -m Minimize at the end of the compilation\n"
+" -l Minimize after most operations (default)\n"
+" -e Minimize after every operation\n"
+"machine selection:\n"
+" -S <spec> FSM specification to output for -V\n"
+" -M <machine> Machine definition/instantiation to output for -V\n"
+"host language:\n"
+" -C The host language is C, C++, Obj-C or Obj-C++ (default)\n"
+" -D The host language is D\n"
+" -J The host language is Java\n"
+" -R The host language is Ruby\n"
+ ;
+}
+
+/* Print version information. */
+void version()
+{
+ cout << "Ragel State Machine Compiler version " VERSION << " " PUBDATE << endl <<
+ "Copyright (c) 2001-2006 by Adrian Thurston" << endl;
+}
+
+/* Total error count. */
+int gblErrorCount = 0;
+
+/* Print the opening to a warning in the input, then return the error ostream. */
+ostream &warning( const InputLoc &loc )
+{
+ assert( loc.fileName != 0 );
+ cerr << loc.fileName << ":" << loc.line << ":" <<
+ loc.col << ": warning: ";
+ return cerr;
+}
+
+/* Print the opening to a program error, then return the error stream. */
+ostream &error()
+{
+ gblErrorCount += 1;
+ cerr << PROGNAME ": ";
+ return cerr;
+}
+
+ostream &error( const InputLoc &loc )
+{
+ gblErrorCount += 1;
+ assert( loc.fileName != 0 );
+ cerr << loc.fileName << ":" << loc.line << ": ";
+ return cerr;
+}
+
+void escapeLineDirectivePath( std::ostream &out, char *path )
+{
+ for ( char *pc = path; *pc != 0; pc++ ) {
+ if ( *pc == '\\' )
+ out << "\\\\";
+ else
+ out << *pc;
+ }
+}
+
+/* Main, process args and call yyparse to start scanning input. */
+int main(int argc, char **argv)
+{
+ ParamCheck pc("o:nmleabjkS:M:CDJRvHh?-:s", argc, argv);
+ char *inputFileName = 0;
+ char inputFileNameArr[] = "<stdin>";
+ char *outputFileName = 0;
+
+ while ( pc.check() ) {
+ switch ( pc.state ) {
+ case ParamCheck::match:
+ switch ( pc.parameter ) {
+ /* Output. */
+ case 'o':
+ if ( *pc.parameterArg == 0 )
+ error() << "a zero length output file name was given" << endl;
+ else if ( outputFileName != 0 )
+ error() << "more than one output file name was given" << endl;
+ else {
+ /* Ok, remember the output file name. */
+ outputFileName = pc.parameterArg;
+ }
+ break;
+
+ /* Minimization, mostly hidden options. */
+ case 'n':
+ minimizeOpt = MinimizeNone;
+ break;
+ case 'm':
+ minimizeOpt = MinimizeEnd;
+ break;
+ case 'l':
+ minimizeOpt = MinimizeMostOps;
+ break;
+ case 'e':
+ minimizeOpt = MinimizeEveryOp;
+ break;
+ case 'a':
+ minimizeLevel = MinimizeApprox;
+ break;
+ case 'b':
+ minimizeLevel = MinimizeStable;
+ break;
+ case 'j':
+ minimizeLevel = MinimizePartition1;
+ break;
+ case 'k':
+ minimizeLevel = MinimizePartition2;
+ break;
+
+ /* Machine spec. */
+ case 'S':
+ if ( *pc.parameterArg == 0 )
+ error() << "please specify an argument to -S" << endl;
+ else if ( machineSpec != 0 )
+ error() << "more than one -S argument was given" << endl;
+ else {
+ /* Ok, remember the path to the machine to generate. */
+ machineSpec = pc.parameterArg;
+ }
+ break;
+
+ /* Machine path. */
+ case 'M':
+ if ( *pc.parameterArg == 0 )
+ error() << "please specify an argument to -M" << endl;
+ else if ( machineName != 0 )
+ error() << "more than one -M argument was given" << endl;
+ else {
+ /* Ok, remember the machine name to generate. */
+ machineName = pc.parameterArg;
+ }
+ break;
+
+ /* Host language types. */
+ case 'C':
+ hostLangType = CCode;
+ hostLang = &hostLangC;
+ break;
+ case 'D':
+ hostLangType = DCode;
+ hostLang = &hostLangD;
+ break;
+ case 'J':
+ hostLangType = JavaCode;
+ hostLang = &hostLangJava;
+ break;
+ case 'R':
+ hostLangType = RubyCode;
+ hostLang = &hostLangRuby;
+ break;
+
+ /* Version and help. */
+ case 'v':
+ version();
+ exit(0);
+ case 'H': case 'h': case '?':
+ usage();
+ exit(0);
+ case 's':
+ printStatistics = true;
+ break;
+ case '-':
+ if ( strcasecmp(pc.parameterArg, "help") == 0 ) {
+ usage();
+ exit(0);
+ }
+ else if ( strcasecmp(pc.parameterArg, "version") == 0 ) {
+ version();
+ exit(0);
+ }
+ else {
+ error() << "--" << pc.parameterArg <<
+ " is an invalid argument" << endl;
+ }
+ }
+ break;
+
+ case ParamCheck::invalid:
+ error() << "-" << pc.parameter << " is an invalid argument" << endl;
+ break;
+
+ case ParamCheck::noparam:
+ /* It is interpreted as an input file. */
+ if ( *pc.curArg == 0 )
+ error() << "a zero length input file name was given" << endl;
+ else if ( inputFileName != 0 )
+ error() << "more than one input file name was given" << endl;
+ else {
+ /* OK, Remember the filename. */
+ inputFileName = pc.curArg;
+ }
+ break;
+ }
+ }
+
+ /* Bail on above errors. */
+ if ( gblErrorCount > 0 )
+ exit(1);
+
+ /* Make sure we are not writing to the same file as the input file. */
+ if ( inputFileName != 0 && outputFileName != 0 &&
+ strcmp( inputFileName, outputFileName ) == 0 )
+ {
+ error() << "output file \"" << outputFileName <<
+ "\" is the same as the input file" << endl;
+ }
+
+ /* Open the input file for reading. */
+ istream *inStream;
+ if ( inputFileName != 0 ) {
+ /* Open the input file for reading. */
+ ifstream *inFile = new ifstream( inputFileName );
+ inStream = inFile;
+ if ( ! inFile->is_open() )
+ error() << "could not open " << inputFileName << " for reading" << endl;
+ }
+ else {
+ inStream = &cin;
+ }
+
+
+ /* Bail on above errors. */
+ if ( gblErrorCount > 0 )
+ exit(1);
+
+ std::ostringstream outputBuffer;
+
+ if ( machineSpec == 0 && machineName == 0 )
+ outputBuffer << "<host line=\"1\" col=\"1\">";
+
+#if defined _WIN32 || defined _WIN64
+ if (inputFileName != 0) {
+ NormalizeWinPath(inputFileName);
+ }
+#endif
+ if (inputFileName == 0) {
+ inputFileName = inputFileNameArr;
+ }
+
+ if (strrchr(inputFileName, '/') == NULL) {
+ error() << "input file path should be absolute: " << inputFileName << endl;
+ exit(1);
+ }
+
+ Scanner scanner( inputFileName, *inStream, outputBuffer, 0, 0, 0, false );
+ scanner.do_scan();
+
+ /* Finished, final check for errors.. */
+ if ( gblErrorCount > 0 )
+ return 1;
+
+ /* Now send EOF to all parsers. */
+ terminateAllParsers();
+
+ /* Finished, final check for errors.. */
+ if ( gblErrorCount > 0 )
+ return 1;
+
+ if ( machineSpec == 0 && machineName == 0 )
+ outputBuffer << "</host>\n";
+
+ if ( gblErrorCount > 0 )
+ return 1;
+
+ ostream *outputFile = 0;
+ if ( outputFileName != 0 )
+ outputFile = new ofstream( outputFileName );
+ else
+ outputFile = &cout;
+
+ /* Write the machines, then the surrounding code. */
+ writeMachines( *outputFile, outputBuffer.str(), inputFileName );
+
+ if ( outputFileName != 0 )
+ delete outputFile;
+
+ return 0;
+}
diff --git a/contrib/tools/ragel5/ragel/parsedata.cpp b/contrib/tools/ragel5/ragel/parsedata.cpp
new file mode 100644
index 0000000000..3e14cc618a
--- /dev/null
+++ b/contrib/tools/ragel5/ragel/parsedata.cpp
@@ -0,0 +1,1505 @@
+/*
+ * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <iostream>
+#include <iomanip>
+#include <errno.h>
+#include <stdlib.h>
+#include <limits.h>
+
+#include "ragel.h"
+#include "rlparse.h"
+#include "parsedata.h"
+#include "parsetree.h"
+#include "mergesort.h"
+#include "xmlcodegen.h"
+
+using namespace std;
+
+char mainMachine[] = "main";
+
+void Token::set(const char *str, int len )
+{
+ length = len;
+ data = new char[len+1];
+ memcpy( data, str, len );
+ data[len] = 0;
+}
+
+void Token::append( const Token &other )
+{
+ int newLength = length + other.length;
+ char *newString = new char[newLength+1];
+ memcpy( newString, data, length );
+ memcpy( newString + length, other.data, other.length );
+ newString[newLength] = 0;
+ data = newString;
+ length = newLength;
+}
+
+/* Perform minimization after an operation according
+ * to the command line args. */
+void afterOpMinimize( FsmAp *fsm, bool lastInSeq )
+{
+ /* Switch on the prefered minimization algorithm. */
+ if ( minimizeOpt == MinimizeEveryOp || minimizeOpt == MinimizeMostOps && lastInSeq ) {
+ /* First clean up the graph. FsmAp operations may leave these
+ * lying around. There should be no dead end states. The subtract
+ * intersection operators are the only places where they may be
+ * created and those operators clean them up. */
+ fsm->removeUnreachableStates();
+
+ switch ( minimizeLevel ) {
+ case MinimizeApprox:
+ fsm->minimizeApproximate();
+ break;
+ case MinimizePartition1:
+ fsm->minimizePartition1();
+ break;
+ case MinimizePartition2:
+ fsm->minimizePartition2();
+ break;
+ case MinimizeStable:
+ fsm->minimizeStable();
+ break;
+ }
+ }
+}
+
+/* Count the transitions in the fsm by walking the state list. */
+int countTransitions( FsmAp *fsm )
+{
+ int numTrans = 0;
+ StateAp *state = fsm->stateList.head;
+ while ( state != 0 ) {
+ numTrans += state->outList.length();
+ state = state->next;
+ }
+ return numTrans;
+}
+
+Key makeFsmKeyHex( char *str, const InputLoc &loc, ParseData *pd )
+{
+ /* Reset errno so we can check for overflow or underflow. In the event of
+ * an error, sets the return val to the upper or lower bound being tested
+ * against. */
+ errno = 0;
+ unsigned int size = keyOps->alphType->size;
+ bool unusedBits = size < sizeof(unsigned long);
+
+ unsigned long ul = strtoul( str, 0, 16 );
+
+ if ( errno == ERANGE || unusedBits && ul >> (size * 8) ) {
+ error(loc) << "literal " << str << " overflows the alphabet type" << endl;
+ ul = 1 << (size * 8);
+ }
+
+ if ( unusedBits && keyOps->alphType->isSigned && ul >> (size * 8 - 1) )
+ ul |= (0xffffffff >> (size*8 ) ) << (size*8);
+
+ return Key( (long)ul );
+}
+
+#ifdef _MSC_VER
+# define strtoll _strtoi64
+#endif
+
+Key makeFsmKeyDec( char *str, const InputLoc &loc, ParseData *pd )
+{
+ /* Convert the number to a decimal. First reset errno so we can check
+ * for overflow or underflow. */
+ errno = 0;
+ long long minVal = keyOps->alphType->minVal;
+ long long maxVal = keyOps->alphType->maxVal;
+
+ long long ll = strtoll( str, 0, 10 );
+
+ /* Check for underflow. */
+ if ( errno == ERANGE && ll < 0 || ll < minVal) {
+ error(loc) << "literal " << str << " underflows the alphabet type" << endl;
+ ll = minVal;
+ }
+ /* Check for overflow. */
+ else if ( errno == ERANGE && ll > 0 || ll > maxVal ) {
+ error(loc) << "literal " << str << " overflows the alphabet type" << endl;
+ ll = maxVal;
+ }
+
+ if ( keyOps->alphType->isSigned )
+ return Key( (long)ll );
+ else
+ return Key( (unsigned long)ll );
+}
+
+/* Make an fsm key in int format (what the fsm graph uses) from an alphabet
+ * number returned by the parser. Validates that the number doesn't overflow
+ * the alphabet type. */
+Key makeFsmKeyNum( char *str, const InputLoc &loc, ParseData *pd )
+{
+ /* Switch on hex/decimal format. */
+ if ( str[0] == '0' && str[1] == 'x' )
+ return makeFsmKeyHex( str, loc, pd );
+ else
+ return makeFsmKeyDec( str, loc, pd );
+}
+
+/* Make an fsm int format (what the fsm graph uses) from a single character.
+ * Performs proper conversion depending on signed/unsigned property of the
+ * alphabet. */
+Key makeFsmKeyChar( char c, ParseData *pd )
+{
+ if ( keyOps->isSigned ) {
+ /* Copy from a char type. */
+ return Key( c );
+ }
+ else {
+ /* Copy from an unsigned byte type. */
+ return Key( (unsigned char)c );
+ }
+}
+
+/* Make an fsm key array in int format (what the fsm graph uses) from a string
+ * of characters. Performs proper conversion depending on signed/unsigned
+ * property of the alphabet. */
+void makeFsmKeyArray( Key *result, char *data, int len, ParseData *pd )
+{
+ if ( keyOps->isSigned ) {
+ /* Copy from a char star type. */
+ char *src = data;
+ for ( int i = 0; i < len; i++ )
+ result[i] = Key(src[i]);
+ }
+ else {
+ /* Copy from an unsigned byte ptr type. */
+ unsigned char *src = (unsigned char*) data;
+ for ( int i = 0; i < len; i++ )
+ result[i] = Key(src[i]);
+ }
+}
+
+/* Like makeFsmKeyArray except the result has only unique keys. They ordering
+ * will be changed. */
+void makeFsmUniqueKeyArray( KeySet &result, char *data, int len,
+ bool caseInsensitive, ParseData *pd )
+{
+ /* Use a transitions list for getting unique keys. */
+ if ( keyOps->isSigned ) {
+ /* Copy from a char star type. */
+ char *src = data;
+ for ( int si = 0; si < len; si++ ) {
+ Key key( src[si] );
+ result.insert( key );
+ if ( caseInsensitive ) {
+ if ( key.isLower() )
+ result.insert( key.toUpper() );
+ else if ( key.isUpper() )
+ result.insert( key.toLower() );
+ }
+ }
+ }
+ else {
+ /* Copy from an unsigned byte ptr type. */
+ unsigned char *src = (unsigned char*) data;
+ for ( int si = 0; si < len; si++ ) {
+ Key key( src[si] );
+ result.insert( key );
+ if ( caseInsensitive ) {
+ if ( key.isLower() )
+ result.insert( key.toUpper() );
+ else if ( key.isUpper() )
+ result.insert( key.toLower() );
+ }
+ }
+ }
+}
+
+FsmAp *dotFsm( ParseData *pd )
+{
+ FsmAp *retFsm = new FsmAp();
+ retFsm->rangeFsm( keyOps->minKey, keyOps->maxKey );
+ return retFsm;
+}
+
+FsmAp *dotStarFsm( ParseData *pd )
+{
+ FsmAp *retFsm = new FsmAp();
+ retFsm->rangeStarFsm( keyOps->minKey, keyOps->maxKey );
+ return retFsm;
+}
+
+/* Make a builtin type. Depends on the signed nature of the alphabet type. */
+FsmAp *makeBuiltin( BuiltinMachine builtin, ParseData *pd )
+{
+ /* FsmAp created to return. */
+ FsmAp *retFsm = 0;
+ bool isSigned = keyOps->isSigned;
+
+ switch ( builtin ) {
+ case BT_Any: {
+ /* All characters. */
+ retFsm = dotFsm( pd );
+ break;
+ }
+ case BT_Ascii: {
+ /* Ascii characters 0 to 127. */
+ retFsm = new FsmAp();
+ retFsm->rangeFsm( 0, 127 );
+ break;
+ }
+ case BT_Extend: {
+ /* Ascii extended characters. This is the full byte range. Dependent
+ * on signed, vs no signed. If the alphabet is one byte then just use
+ * dot fsm. */
+ if ( isSigned ) {
+ retFsm = new FsmAp();
+ retFsm->rangeFsm( -128, 127 );
+ }
+ else {
+ retFsm = new FsmAp();
+ retFsm->rangeFsm( 0, 255 );
+ }
+ break;
+ }
+ case BT_Alpha: {
+ /* Alpha [A-Za-z]. */
+ FsmAp *upper = new FsmAp(), *lower = new FsmAp();
+ upper->rangeFsm( 'A', 'Z' );
+ lower->rangeFsm( 'a', 'z' );
+ upper->unionOp( lower );
+ upper->minimizePartition2();
+ retFsm = upper;
+ break;
+ }
+ case BT_Digit: {
+ /* Digits [0-9]. */
+ retFsm = new FsmAp();
+ retFsm->rangeFsm( '0', '9' );
+ break;
+ }
+ case BT_Alnum: {
+ /* Alpha numerics [0-9A-Za-z]. */
+ FsmAp *digit = new FsmAp(), *lower = new FsmAp();
+ FsmAp *upper = new FsmAp();
+ digit->rangeFsm( '0', '9' );
+ upper->rangeFsm( 'A', 'Z' );
+ lower->rangeFsm( 'a', 'z' );
+ digit->unionOp( upper );
+ digit->unionOp( lower );
+ digit->minimizePartition2();
+ retFsm = digit;
+ break;
+ }
+ case BT_Lower: {
+ /* Lower case characters. */
+ retFsm = new FsmAp();
+ retFsm->rangeFsm( 'a', 'z' );
+ break;
+ }
+ case BT_Upper: {
+ /* Upper case characters. */
+ retFsm = new FsmAp();
+ retFsm->rangeFsm( 'A', 'Z' );
+ break;
+ }
+ case BT_Cntrl: {
+ /* Control characters. */
+ FsmAp *cntrl = new FsmAp();
+ FsmAp *highChar = new FsmAp();
+ cntrl->rangeFsm( 0, 31 );
+ highChar->concatFsm( 127 );
+ cntrl->unionOp( highChar );
+ cntrl->minimizePartition2();
+ retFsm = cntrl;
+ break;
+ }
+ case BT_Graph: {
+ /* Graphical ascii characters [!-~]. */
+ retFsm = new FsmAp();
+ retFsm->rangeFsm( '!', '~' );
+ break;
+ }
+ case BT_Print: {
+ /* Printable characters. Same as graph except includes space. */
+ retFsm = new FsmAp();
+ retFsm->rangeFsm( ' ', '~' );
+ break;
+ }
+ case BT_Punct: {
+ /* Punctuation. */
+ FsmAp *range1 = new FsmAp();
+ FsmAp *range2 = new FsmAp();
+ FsmAp *range3 = new FsmAp();
+ FsmAp *range4 = new FsmAp();
+ range1->rangeFsm( '!', '/' );
+ range2->rangeFsm( ':', '@' );
+ range3->rangeFsm( '[', '`' );
+ range4->rangeFsm( '{', '~' );
+ range1->unionOp( range2 );
+ range1->unionOp( range3 );
+ range1->unionOp( range4 );
+ range1->minimizePartition2();
+ retFsm = range1;
+ break;
+ }
+ case BT_Space: {
+ /* Whitespace: [\t\v\f\n\r ]. */
+ FsmAp *cntrl = new FsmAp();
+ FsmAp *space = new FsmAp();
+ cntrl->rangeFsm( '\t', '\r' );
+ space->concatFsm( ' ' );
+ cntrl->unionOp( space );
+ cntrl->minimizePartition2();
+ retFsm = cntrl;
+ break;
+ }
+ case BT_Xdigit: {
+ /* Hex digits [0-9A-Fa-f]. */
+ FsmAp *digit = new FsmAp();
+ FsmAp *upper = new FsmAp();
+ FsmAp *lower = new FsmAp();
+ digit->rangeFsm( '0', '9' );
+ upper->rangeFsm( 'A', 'F' );
+ lower->rangeFsm( 'a', 'f' );
+ digit->unionOp( upper );
+ digit->unionOp( lower );
+ digit->minimizePartition2();
+ retFsm = digit;
+ break;
+ }
+ case BT_Lambda: {
+ retFsm = new FsmAp();
+ retFsm->lambdaFsm();
+ break;
+ }
+ case BT_Empty: {
+ retFsm = new FsmAp();
+ retFsm->emptyFsm();
+ break;
+ }}
+
+ return retFsm;
+}
+
+/* Check if this name inst or any name inst below is referenced. */
+bool NameInst::anyRefsRec()
+{
+ if ( numRefs > 0 )
+ return true;
+
+ /* Recurse on children until true. */
+ for ( NameVect::Iter ch = childVect; ch.lte(); ch++ ) {
+ if ( (*ch)->anyRefsRec() )
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * ParseData
+ */
+
+/* Initialize the structure that will collect info during the parse of a
+ * machine. */
+ParseData::ParseData(const char *fileName, char *sectionName,
+ const InputLoc &sectionLoc )
+:
+ sectionGraph(0),
+ generatingSectionSubset(false),
+ nextPriorKey(0),
+ /* 0 is reserved for global error actions. */
+ nextLocalErrKey(1),
+ nextNameId(0),
+ nextCondId(0),
+ alphTypeSet(false),
+ getKeyExpr(0),
+ accessExpr(0),
+ curStateExpr(0),
+ lowerNum(0),
+ upperNum(0),
+ fileName(fileName),
+ sectionName(sectionName),
+ sectionLoc(sectionLoc),
+ errorCount(0),
+ curActionOrd(0),
+ curPriorOrd(0),
+ rootName(0),
+ exportsRootName(0),
+ nextEpsilonResolvedLink(0),
+ nextLongestMatchId(1),
+ lmRequiresErrorState(false)
+{
+ /* Initialize the dictionary of graphs. This is our symbol table. The
+ * initialization needs to be done on construction which happens at the
+ * beginning of a machine spec so any assignment operators can reference
+ * the builtins. */
+ initGraphDict();
+}
+
+/* Clean up the data collected during a parse. */
+ParseData::~ParseData()
+{
+ /* Delete all the nodes in the action list. Will cause all the
+ * string data that represents the actions to be deallocated. */
+ actionList.empty();
+}
+
+/* Make a name id in the current name instantiation scope if it is not
+ * already there. */
+NameInst *ParseData::addNameInst( const InputLoc &loc, const char *data, bool isLabel )
+{
+ /* Create the name instantitaion object and insert it. */
+ NameInst *newNameInst = new NameInst( loc, curNameInst, data, nextNameId++, isLabel );
+ curNameInst->childVect.append( newNameInst );
+ if ( data != 0 )
+ curNameInst->children.insertMulti( data, newNameInst );
+ return newNameInst;
+}
+
+void ParseData::initNameWalk()
+{
+ curNameInst = rootName;
+ curNameChild = 0;
+}
+
+void ParseData::initExportsNameWalk()
+{
+ curNameInst = exportsRootName;
+ curNameChild = 0;
+}
+
+/* Goes into the next child scope. The number of the child is already set up.
+ * We need this for the syncronous name tree and parse tree walk to work
+ * properly. It is reset on entry into a scope and advanced on poping of a
+ * scope. A call to enterNameScope should be accompanied by a corresponding
+ * popNameScope. */
+NameFrame ParseData::enterNameScope( bool isLocal, int numScopes )
+{
+ /* Save off the current data. */
+ NameFrame retFrame;
+ retFrame.prevNameInst = curNameInst;
+ retFrame.prevNameChild = curNameChild;
+ retFrame.prevLocalScope = localNameScope;
+
+ /* Enter into the new name scope. */
+ for ( int i = 0; i < numScopes; i++ ) {
+ curNameInst = curNameInst->childVect[curNameChild];
+ curNameChild = 0;
+ }
+
+ if ( isLocal )
+ localNameScope = curNameInst;
+
+ return retFrame;
+}
+
+/* Return from a child scope to a parent. The parent info must be specified as
+ * an argument and is obtained from the corresponding call to enterNameScope.
+ * */
+void ParseData::popNameScope( const NameFrame &frame )
+{
+ /* Pop the name scope. */
+ curNameInst = frame.prevNameInst;
+ curNameChild = frame.prevNameChild+1;
+ localNameScope = frame.prevLocalScope;
+}
+
+void ParseData::resetNameScope( const NameFrame &frame )
+{
+ /* Pop the name scope. */
+ curNameInst = frame.prevNameInst;
+ curNameChild = frame.prevNameChild;
+ localNameScope = frame.prevLocalScope;
+}
+
+
+void ParseData::unsetObsoleteEntries( FsmAp *graph )
+{
+ /* Loop the reference names and increment the usage. Names that are no
+ * longer needed will be unset in graph. */
+ for ( NameVect::Iter ref = curNameInst->referencedNames; ref.lte(); ref++ ) {
+ /* Get the name. */
+ NameInst *name = *ref;
+ name->numUses += 1;
+
+ /* If the name is no longer needed unset its corresponding entry. */
+ if ( name->numUses == name->numRefs ) {
+ assert( graph->entryPoints.find( name->id ) != 0 );
+ graph->unsetEntry( name->id );
+ assert( graph->entryPoints.find( name->id ) == 0 );
+ }
+ }
+}
+
+NameSet ParseData::resolvePart( NameInst *refFrom, const char *data, bool recLabelsOnly )
+{
+ /* Queue needed for breadth-first search, load it with the start node. */
+ NameInstList nameQueue;
+ nameQueue.append( refFrom );
+
+ NameSet result;
+ while ( nameQueue.length() > 0 ) {
+ /* Pull the next from location off the queue. */
+ NameInst *from = nameQueue.detachFirst();
+
+ /* Look for the name. */
+ NameMapEl *low, *high;
+ if ( from->children.findMulti( data, low, high ) ) {
+ /* Record all instances of the name. */
+ for ( ; low <= high; low++ )
+ result.insert( low->value );
+ }
+
+ /* Name not there, do breadth-first operation of appending all
+ * childrent to the processing queue. */
+ for ( NameVect::Iter name = from->childVect; name.lte(); name++ ) {
+ if ( !recLabelsOnly || (*name)->isLabel )
+ nameQueue.append( *name );
+ }
+ }
+
+ /* Queue exhausted and name never found. */
+ return result;
+}
+
+void ParseData::resolveFrom( NameSet &result, NameInst *refFrom,
+ const NameRef &nameRef, int namePos )
+{
+ /* Look for the name in the owning scope of the factor with aug. */
+ NameSet partResult = resolvePart( refFrom, nameRef[namePos], false );
+
+ /* If there are more parts to the name then continue on. */
+ if ( ++namePos < nameRef.length() ) {
+ /* There are more components to the name, search using all the part
+ * results as the base. */
+ for ( NameSet::Iter name = partResult; name.lte(); name++ )
+ resolveFrom( result, *name, nameRef, namePos );
+ }
+ else {
+ /* This is the last component, append the part results to the final
+ * results. */
+ result.insert( partResult );
+ }
+}
+
+/* Write out a name reference. */
+ostream &operator<<( ostream &out, const NameRef &nameRef )
+{
+ int pos = 0;
+ if ( nameRef[pos] == 0 ) {
+ out << "::";
+ pos += 1;
+ }
+ out << nameRef[pos++];
+ for ( ; pos < nameRef.length(); pos++ )
+ out << "::" << nameRef[pos];
+ return out;
+}
+
+ostream &operator<<( ostream &out, const NameInst &nameInst )
+{
+ /* Count the number fully qualified name parts. */
+ int numParents = 0;
+ NameInst *curParent = nameInst.parent;
+ while ( curParent != 0 ) {
+ numParents += 1;
+ curParent = curParent->parent;
+ }
+
+ /* Make an array and fill it in. */
+ curParent = nameInst.parent;
+ NameInst **parents = new NameInst*[numParents];
+ for ( int p = numParents-1; p >= 0; p-- ) {
+ parents[p] = curParent;
+ curParent = curParent->parent;
+ }
+
+ /* Write the parents out, skip the root. */
+ for ( int p = 1; p < numParents; p++ )
+ out << "::" << ( parents[p]->name != 0 ? parents[p]->name : "<ANON>" );
+
+ /* Write the name and cleanup. */
+ out << "::" << ( nameInst.name != 0 ? nameInst.name : "<ANON>" );
+ delete[] parents;
+ return out;
+}
+
+struct CmpNameInstLoc
+{
+ static int compare( const NameInst *ni1, const NameInst *ni2 )
+ {
+ if ( ni1->loc.line < ni2->loc.line )
+ return -1;
+ else if ( ni1->loc.line > ni2->loc.line )
+ return 1;
+ else if ( ni1->loc.col < ni2->loc.col )
+ return -1;
+ else if ( ni1->loc.col > ni2->loc.col )
+ return 1;
+ return 0;
+ }
+};
+
+void errorStateLabels( const NameSet &resolved )
+{
+ MergeSort<NameInst*, CmpNameInstLoc> mergeSort;
+ mergeSort.sort( resolved.data, resolved.length() );
+ for ( NameSet::Iter res = resolved; res.lte(); res++ )
+ error((*res)->loc) << " -> " << **res << endl;
+}
+
+
+NameInst *ParseData::resolveStateRef( const NameRef &nameRef, InputLoc &loc, Action *action )
+{
+ NameInst *nameInst = 0;
+
+ /* Do the local search if the name is not strictly a root level name
+ * search. */
+ if ( nameRef[0] != 0 ) {
+ /* If the action is referenced, resolve all of them. */
+ if ( action != 0 && action->actionRefs.length() > 0 ) {
+ /* Look for the name in all referencing scopes. */
+ NameSet resolved;
+ for ( ActionRefs::Iter actRef = action->actionRefs; actRef.lte(); actRef++ )
+ resolveFrom( resolved, *actRef, nameRef, 0 );
+
+ if ( resolved.length() > 0 ) {
+ /* Take the first one. */
+ nameInst = resolved[0];
+ if ( resolved.length() > 1 ) {
+ /* Complain about the multiple references. */
+ error(loc) << "state reference " << nameRef <<
+ " resolves to multiple entry points" << endl;
+ errorStateLabels( resolved );
+ }
+ }
+ }
+ }
+
+ /* If not found in the local scope, look in global. */
+ if ( nameInst == 0 ) {
+ NameSet resolved;
+ int fromPos = nameRef[0] != 0 ? 0 : 1;
+ resolveFrom( resolved, rootName, nameRef, fromPos );
+
+ if ( resolved.length() > 0 ) {
+ /* Take the first. */
+ nameInst = resolved[0];
+ if ( resolved.length() > 1 ) {
+ /* Complain about the multiple references. */
+ error(loc) << "state reference " << nameRef <<
+ " resolves to multiple entry points" << endl;
+ errorStateLabels( resolved );
+ }
+ }
+ }
+
+ if ( nameInst == 0 ) {
+ /* If not found then complain. */
+ error(loc) << "could not resolve state reference " << nameRef << endl;
+ }
+ return nameInst;
+}
+
+void ParseData::resolveNameRefs( InlineList *inlineList, Action *action )
+{
+ for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
+ switch ( item->type ) {
+ case InlineItem::Entry: case InlineItem::Goto:
+ case InlineItem::Call: case InlineItem::Next: {
+ /* Resolve, pass action for local search. */
+ NameInst *target = resolveStateRef( *item->nameRef, item->loc, action );
+
+ /* Check if the target goes into a longest match. */
+ NameInst *search = target->parent;
+ while ( search != 0 ) {
+ if ( search->isLongestMatch ) {
+ error(item->loc) << "cannot enter inside a longest "
+ "match construction as an entry point" << endl;
+ break;
+ }
+ search = search->parent;
+ }
+
+ /* Note the reference in the name. This will cause the entry
+ * point to survive to the end of the graph generating walk. */
+ if ( target != 0 )
+ target->numRefs += 1;
+ item->nameTarg = target;
+ break;
+ }
+ default:
+ break;
+ }
+
+ /* Some of the item types may have children. */
+ if ( item->children != 0 )
+ resolveNameRefs( item->children, action );
+ }
+}
+
+/* Resolve references to labels in actions. */
+void ParseData::resolveActionNameRefs()
+{
+ for ( ActionList::Iter act = actionList; act.lte(); act++ ) {
+ /* Only care about the actions that are referenced. */
+ if ( act->actionRefs.length() > 0 )
+ resolveNameRefs( act->inlineList, act );
+ }
+}
+
+/* Walk a name tree starting at from and fill the name index. */
+void ParseData::fillNameIndex( NameInst *from )
+{
+ /* Fill the value for from in the name index. */
+ nameIndex[from->id] = from;
+
+ /* Recurse on the implicit final state and then all children. */
+ if ( from->final != 0 )
+ fillNameIndex( from->final );
+ for ( NameVect::Iter name = from->childVect; name.lte(); name++ )
+ fillNameIndex( *name );
+}
+
+void ParseData::makeRootNames()
+{
+ /* Create the root name. */
+ rootName = new NameInst( InputLoc(), 0, 0, nextNameId++, false );
+ exportsRootName = new NameInst( InputLoc(), 0, 0, nextNameId++, false );
+}
+
+/* Build the name tree and supporting data structures. */
+void ParseData::makeNameTree( GraphDictEl *dictEl )
+{
+ /* Set up curNameInst for the walk. */
+ initNameWalk();
+
+ if ( dictEl != 0 ) {
+ /* A start location has been specified. */
+ dictEl->value->makeNameTree( dictEl->loc, this );
+ }
+ else {
+ /* First make the name tree. */
+ for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ ) {
+ /* Recurse on the instance. */
+ glel->value->makeNameTree( glel->loc, this );
+ }
+ }
+
+ /* The number of nodes in the tree can now be given by nextNameId */
+ nameIndex = new NameInst*[nextNameId];
+ memset( nameIndex, 0, sizeof(NameInst*)*nextNameId );
+ fillNameIndex( rootName );
+ fillNameIndex( exportsRootName );
+}
+
+
+void ParseData::createBuiltin(const char *name, BuiltinMachine builtin )
+{
+ Expression *expression = new Expression( builtin );
+ Join *join = new Join( expression );
+ JoinOrLm *joinOrLm = new JoinOrLm( join );
+ VarDef *varDef = new VarDef( name, joinOrLm );
+ GraphDictEl *graphDictEl = new GraphDictEl( name, varDef );
+ graphDict.insert( graphDictEl );
+}
+
+/* Initialize the graph dict with builtin types. */
+void ParseData::initGraphDict( )
+{
+ createBuiltin( "any", BT_Any );
+ createBuiltin( "ascii", BT_Ascii );
+ createBuiltin( "extend", BT_Extend );
+ createBuiltin( "alpha", BT_Alpha );
+ createBuiltin( "digit", BT_Digit );
+ createBuiltin( "alnum", BT_Alnum );
+ createBuiltin( "lower", BT_Lower );
+ createBuiltin( "upper", BT_Upper );
+ createBuiltin( "cntrl", BT_Cntrl );
+ createBuiltin( "graph", BT_Graph );
+ createBuiltin( "print", BT_Print );
+ createBuiltin( "punct", BT_Punct );
+ createBuiltin( "space", BT_Space );
+ createBuiltin( "xdigit", BT_Xdigit );
+ createBuiltin( "null", BT_Lambda );
+ createBuiltin( "zlen", BT_Lambda );
+ createBuiltin( "empty", BT_Empty );
+}
+
+/* Set the alphabet type. If the types are not valid returns false. */
+bool ParseData::setAlphType( char *s1, char *s2 )
+{
+ bool valid = false;
+ for ( int i = 0; i < hostLang->numHostTypes; i++ ) {
+ if ( strcmp( s1, hostLang->hostTypes[i].data1 ) == 0 &&
+ hostLang->hostTypes[i].data2 != 0 &&
+ strcmp( s2, hostLang->hostTypes[i].data2 ) == 0 )
+ {
+ valid = true;
+ userAlphType = hostLang->hostTypes + i;
+ break;
+ }
+ }
+
+ alphTypeSet = true;
+ return valid;
+}
+
+/* Set the alphabet type. If the types are not valid returns false. */
+bool ParseData::setAlphType( char *s1 )
+{
+ bool valid = false;
+ for ( int i = 0; i < hostLang->numHostTypes; i++ ) {
+ if ( strcmp( s1, hostLang->hostTypes[i].data1 ) == 0 &&
+ hostLang->hostTypes[i].data2 == 0 )
+ {
+ valid = true;
+ userAlphType = hostLang->hostTypes + i;
+ break;
+ }
+ }
+
+ alphTypeSet = true;
+ return valid;
+}
+
+/* Initialize the key operators object that will be referenced by all fsms
+ * created. */
+void ParseData::initKeyOps( )
+{
+ /* Signedness and bounds. */
+ HostType *alphType = alphTypeSet ? userAlphType : hostLang->defaultAlphType;
+ thisKeyOps.setAlphType( alphType );
+
+ if ( lowerNum != 0 ) {
+ /* If ranges are given then interpret the alphabet type. */
+ thisKeyOps.minKey = makeFsmKeyNum( lowerNum, rangeLowLoc, this );
+ thisKeyOps.maxKey = makeFsmKeyNum( upperNum, rangeHighLoc, this );
+ }
+
+ thisCondData.nextCondKey = thisKeyOps.maxKey;
+ thisCondData.nextCondKey.increment();
+}
+
+void ParseData::printNameInst( NameInst *nameInst, int level )
+{
+ for ( int i = 0; i < level; i++ )
+ cerr << " ";
+ cerr << (nameInst->name != 0 ? nameInst->name : "<ANON>") <<
+ " id: " << nameInst->id <<
+ " refs: " << nameInst->numRefs <<
+ " uses: " << nameInst->numUses << endl;
+ for ( NameVect::Iter name = nameInst->childVect; name.lte(); name++ )
+ printNameInst( *name, level+1 );
+}
+
+/* Remove duplicates of unique actions from an action table. */
+void ParseData::removeDups( ActionTable &table )
+{
+ /* Scan through the table looking for unique actions to
+ * remove duplicates of. */
+ for ( int i = 0; i < table.length(); i++ ) {
+ /* Remove any duplicates ahead of i. */
+ for ( int r = i+1; r < table.length(); ) {
+ if ( table[r].value == table[i].value )
+ table.vremove(r);
+ else
+ r += 1;
+ }
+ }
+}
+
+/* Remove duplicates from action lists. This operates only on transition and
+ * eof action lists and so should be called once all actions have been
+ * transfered to their final resting place. */
+void ParseData::removeActionDups( FsmAp *graph )
+{
+ /* Loop all states. */
+ for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) {
+ /* Loop all transitions. */
+ for ( TransList::Iter trans = state->outList; trans.lte(); trans++ )
+ removeDups( trans->actionTable );
+ removeDups( state->toStateActionTable );
+ removeDups( state->fromStateActionTable );
+ removeDups( state->eofActionTable );
+ }
+}
+
+Action *ParseData::newAction(const char *name, InlineList *inlineList )
+{
+ InputLoc loc;
+ loc.line = 1;
+ loc.col = 1;
+ loc.fileName = "<NONE>";
+
+ Action *action = new Action( loc, name, inlineList, nextCondId++ );
+ action->actionRefs.append( rootName );
+ actionList.append( action );
+ return action;
+}
+
+void ParseData::initLongestMatchData()
+{
+ if ( lmList.length() > 0 ) {
+ /* The initTokStart action resets the token start. */
+ InlineList *il1 = new InlineList;
+ il1->append( new InlineItem( InputLoc(), InlineItem::LmInitTokStart ) );
+ initTokStart = newAction( "initts", il1 );
+ initTokStart->isLmAction = true;
+
+ /* The initActId action gives act a default value. */
+ InlineList *il4 = new InlineList;
+ il4->append( new InlineItem( InputLoc(), InlineItem::LmInitAct ) );
+ initActId = newAction( "initact", il4 );
+ initActId->isLmAction = true;
+
+ /* The setTokStart action sets tokstart. */
+ InlineList *il5 = new InlineList;
+ il5->append( new InlineItem( InputLoc(), InlineItem::LmSetTokStart ) );
+ setTokStart = newAction( "tokstart", il5 );
+ setTokStart->isLmAction = true;
+
+ /* The setTokEnd action sets tokend. */
+ InlineList *il3 = new InlineList;
+ il3->append( new InlineItem( InputLoc(), InlineItem::LmSetTokEnd ) );
+ setTokEnd = newAction( "tokend", il3 );
+ setTokEnd->isLmAction = true;
+
+ /* The action will also need an ordering: ahead of all user action
+ * embeddings. */
+ initTokStartOrd = curActionOrd++;
+ initActIdOrd = curActionOrd++;
+ setTokStartOrd = curActionOrd++;
+ setTokEndOrd = curActionOrd++;
+ }
+}
+
+/* After building the graph, do some extra processing to ensure the runtime
+ * data of the longest mactch operators is consistent. */
+void ParseData::setLongestMatchData( FsmAp *graph )
+{
+ if ( lmList.length() > 0 ) {
+ /* Make sure all entry points (targets of fgoto, fcall, fnext, fentry)
+ * init the tokstart. */
+ for ( EntryMap::Iter en = graph->entryPoints; en.lte(); en++ ) {
+ /* This is run after duplicates are removed, we must guard against
+ * inserting a duplicate. */
+ ActionTable &actionTable = en->value->toStateActionTable;
+ if ( ! actionTable.hasAction( initTokStart ) )
+ actionTable.setAction( initTokStartOrd, initTokStart );
+ }
+
+ /* Find the set of states that are the target of transitions with
+ * actions that have calls. These states will be targeted by fret
+ * statements. */
+ StateSet states;
+ for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) {
+ for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
+ for ( ActionTable::Iter ati = trans->actionTable; ati.lte(); ati++ ) {
+ if ( ati->value->anyCall && trans->toState != 0 )
+ states.insert( trans->toState );
+ }
+ }
+ }
+
+
+ /* Init tokstart upon entering the above collected states. */
+ for ( StateSet::Iter ps = states; ps.lte(); ps++ ) {
+ /* This is run after duplicates are removed, we must guard against
+ * inserting a duplicate. */
+ ActionTable &actionTable = (*ps)->toStateActionTable;
+ if ( ! actionTable.hasAction( initTokStart ) )
+ actionTable.setAction( initTokStartOrd, initTokStart );
+ }
+ }
+}
+
+/* Make the graph from a graph dict node. Does minimization and state sorting. */
+FsmAp *ParseData::makeInstance( GraphDictEl *gdNode )
+{
+ /* Build the graph from a walk of the parse tree. */
+ FsmAp *graph = gdNode->value->walk( this );
+
+ /* Resolve any labels that point to multiple states. Any labels that are
+ * still around are referenced only by gotos and calls and they need to be
+ * made into deterministic entry points. */
+ graph->deterministicEntry();
+
+ /*
+ * All state construction is now complete.
+ */
+
+ /* Transfer global error actions. */
+ for ( StateList::Iter state = graph->stateList; state.lte(); state++ )
+ graph->transferErrorActions( state, 0 );
+
+ removeActionDups( graph );
+
+ /* Remove unreachable states. There should be no dead end states. The
+ * subtract and intersection operators are the only places where they may
+ * be created and those operators clean them up. */
+ graph->removeUnreachableStates();
+
+ /* No more fsm operations are to be done. Action ordering numbers are
+ * no longer of use and will just hinder minimization. Clear them. */
+ graph->nullActionKeys();
+
+ /* Transition priorities are no longer of use. We can clear them
+ * because they will just hinder minimization as well. Clear them. */
+ graph->clearAllPriorities();
+
+ if ( minimizeOpt != MinimizeNone ) {
+ /* Minimize here even if we minimized at every op. Now that function
+ * keys have been cleared we may get a more minimal fsm. */
+ switch ( minimizeLevel ) {
+ case MinimizeApprox:
+ graph->minimizeApproximate();
+ break;
+ case MinimizeStable:
+ graph->minimizeStable();
+ break;
+ case MinimizePartition1:
+ graph->minimizePartition1();
+ break;
+ case MinimizePartition2:
+ graph->minimizePartition2();
+ break;
+ }
+ }
+
+ graph->compressTransitions();
+
+ return graph;
+}
+
+void ParseData::printNameTree()
+{
+ /* Print the name instance map. */
+ for ( NameVect::Iter name = rootName->childVect; name.lte(); name++ )
+ printNameInst( *name, 0 );
+
+ cerr << "name index:" << endl;
+ /* Show that the name index is correct. */
+ for ( int ni = 0; ni < nextNameId; ni++ ) {
+ cerr << ni << ": ";
+ const char *name = nameIndex[ni]->name;
+ cerr << ( name != 0 ? name : "<ANON>" ) << endl;
+ }
+}
+
+FsmAp *ParseData::makeSpecific( GraphDictEl *gdNode )
+{
+ /* Build the name tree and supporting data structures. */
+ makeNameTree( gdNode );
+
+ /* Resove name references from gdNode. */
+ initNameWalk();
+ gdNode->value->resolveNameRefs( this );
+
+ /* Do not resolve action references. Since we are not building the entire
+ * graph there's a good chance that many name references will fail. This
+ * is okay since generating part of the graph is usually only done when
+ * inspecting the compiled machine. */
+
+ /* Same story for extern entry point references. */
+
+ /* Flag this case so that the XML code generator is aware that we haven't
+ * looked up name references in actions. It can then avoid segfaulting. */
+ generatingSectionSubset = true;
+
+ /* Just building the specified graph. */
+ initNameWalk();
+ FsmAp *mainGraph = makeInstance( gdNode );
+
+ return mainGraph;
+}
+
+FsmAp *ParseData::makeAll()
+{
+ /* Build the name tree and supporting data structures. */
+ makeNameTree( 0 );
+
+ /* Resove name references in the tree. */
+ initNameWalk();
+ for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ )
+ glel->value->resolveNameRefs( this );
+
+ /* Resolve action code name references. */
+ resolveActionNameRefs();
+
+ /* Force name references to the top level instantiations. */
+ for ( NameVect::Iter inst = rootName->childVect; inst.lte(); inst++ )
+ (*inst)->numRefs += 1;
+
+ FsmAp *mainGraph = 0;
+ FsmAp **graphs = new FsmAp*[instanceList.length()];
+ int numOthers = 0;
+
+ /* Make all the instantiations, we know that main exists in this list. */
+ initNameWalk();
+ for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ ) {
+ if ( strcmp( glel->key, mainMachine ) == 0 ) {
+ /* Main graph is always instantiated. */
+ mainGraph = makeInstance( glel );
+ }
+ else {
+ /* Instantiate and store in others array. */
+ graphs[numOthers++] = makeInstance( glel );
+ }
+ }
+
+ if ( mainGraph == 0 )
+ mainGraph = graphs[--numOthers];
+
+ if ( numOthers > 0 ) {
+ /* Add all the other graphs into main. */
+ mainGraph->globOp( graphs, numOthers );
+ }
+
+ delete[] graphs;
+ return mainGraph;
+}
+
+void ParseData::analyzeAction( Action *action, InlineList *inlineList )
+{
+ /* FIXME: Actions used as conditions should be very constrained. */
+ for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
+ if ( item->type == InlineItem::Call || item->type == InlineItem::CallExpr )
+ action->anyCall = true;
+
+ /* Need to recurse into longest match items. */
+ if ( item->type == InlineItem::LmSwitch ) {
+ LongestMatch *lm = item->longestMatch;
+ for ( LmPartList::Iter lmi = *lm->longestMatchList; lmi.lte(); lmi++ ) {
+ if ( lmi->action != 0 )
+ analyzeAction( action, lmi->action->inlineList );
+ }
+ }
+
+ if ( item->type == InlineItem::LmOnLast ||
+ item->type == InlineItem::LmOnNext ||
+ item->type == InlineItem::LmOnLagBehind )
+ {
+ LongestMatchPart *lmi = item->longestMatchPart;
+ if ( lmi->action != 0 )
+ analyzeAction( action, lmi->action->inlineList );
+ }
+
+ if ( item->children != 0 )
+ analyzeAction( action, item->children );
+ }
+}
+
+
+/* Check actions for bad uses of fsm directives. We don't go inside longest
+ * match items in actions created by ragel, since we just want the user
+ * actions. */
+void ParseData::checkInlineList( Action *act, InlineList *inlineList )
+{
+ for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
+ /* EOF checks. */
+ if ( act->numEofRefs > 0 ) {
+ switch ( item->type ) {
+ case InlineItem::PChar:
+ error(item->loc) << "pointer to current element does not exist in "
+ "EOF action code" << endl;
+ break;
+ case InlineItem::Char:
+ error(item->loc) << "current element does not exist in "
+ "EOF action code" << endl;
+ break;
+ case InlineItem::Hold:
+ error(item->loc) << "changing the current element not possible in "
+ "EOF action code" << endl;
+ break;
+ case InlineItem::Exec:
+ error(item->loc) << "changing the current element not possible in "
+ "EOF action code" << endl;
+ break;
+ case InlineItem::Goto: case InlineItem::Call:
+ case InlineItem::Next: case InlineItem::GotoExpr:
+ case InlineItem::CallExpr: case InlineItem::NextExpr:
+ case InlineItem::Ret:
+ error(item->loc) << "changing the current state not possible in "
+ "EOF action code" << endl;
+ break;
+ default:
+ break;
+ }
+ }
+
+ /* Recurse. */
+ if ( item->children != 0 )
+ checkInlineList( act, item->children );
+ }
+}
+
+void ParseData::checkAction( Action *action )
+{
+ /* Check for actions with calls that are embedded within a longest match
+ * machine. */
+ if ( !action->isLmAction && action->numRefs() > 0 && action->anyCall ) {
+ for ( ActionRefs::Iter ar = action->actionRefs; ar.lte(); ar++ ) {
+ NameInst *check = *ar;
+ while ( check != 0 ) {
+ if ( check->isLongestMatch ) {
+ error(action->loc) << "within a scanner, fcall is permitted"
+ " only in pattern actions" << endl;
+ break;
+ }
+ check = check->parent;
+ }
+ }
+ }
+
+ checkInlineList( action, action->inlineList );
+}
+
+
+void ParseData::analyzeGraph( FsmAp *graph )
+{
+ for ( ActionList::Iter act = actionList; act.lte(); act++ )
+ analyzeAction( act, act->inlineList );
+
+ for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
+ /* The transition list. */
+ for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
+ for ( ActionTable::Iter at = trans->actionTable; at.lte(); at++ )
+ at->value->numTransRefs += 1;
+ }
+
+ for ( ActionTable::Iter at = st->toStateActionTable; at.lte(); at++ )
+ at->value->numToStateRefs += 1;
+
+ for ( ActionTable::Iter at = st->fromStateActionTable; at.lte(); at++ )
+ at->value->numFromStateRefs += 1;
+
+ for ( ActionTable::Iter at = st->eofActionTable; at.lte(); at++ )
+ at->value->numEofRefs += 1;
+
+ for ( StateCondList::Iter sc = st->stateCondList; sc.lte(); sc++ ) {
+ for ( CondSet::Iter sci = sc->condSpace->condSet; sci.lte(); sci++ )
+ (*sci)->numCondRefs += 1;
+ }
+ }
+
+ /* Checks for bad usage of directives in action code. */
+ for ( ActionList::Iter act = actionList; act.lte(); act++ )
+ checkAction( act );
+}
+
+void ParseData::makeExportsNameTree()
+{
+ /* Make a name tree for the exports. */
+ initExportsNameWalk();
+
+ /* First make the name tree. */
+ for ( GraphDict::Iter gdel = graphDict; gdel.lte(); gdel++ ) {
+ if ( gdel->value->isExport ) {
+ /* Recurse on the instance. */
+ gdel->value->makeNameTree( gdel->loc, this );
+ }
+ }
+}
+
+void ParseData::makeExports()
+{
+ makeExportsNameTree();
+
+ /* Resove name references in the tree. */
+ initExportsNameWalk();
+ for ( GraphDict::Iter gdel = graphDict; gdel.lte(); gdel++ ) {
+ if ( gdel->value->isExport )
+ gdel->value->resolveNameRefs( this );
+ }
+
+ /* Make all the instantiations, we know that main exists in this list. */
+ initExportsNameWalk();
+ for ( GraphDict::Iter gdel = graphDict; gdel.lte(); gdel++ ) {
+ /* Check if this var def is an export. */
+ if ( gdel->value->isExport ) {
+ /* Build the graph from a walk of the parse tree. */
+ FsmAp *graph = gdel->value->walk( this );
+
+ /* Build the graph from a walk of the parse tree. */
+ if ( !graph->checkSingleCharMachine() ) {
+ error(gdel->loc) << "bad export machine, must define "
+ "a single character" << endl;
+ }
+ else {
+ /* Safe to extract the key and declare the export. */
+ Key exportKey = graph->startState->outList.head->lowKey;
+ exportList.append( new Export( gdel->value->name, exportKey ) );
+ }
+ }
+ }
+
+}
+
+void ParseData::prepareMachineGen( GraphDictEl *graphDictEl )
+{
+ beginProcessing();
+ initKeyOps();
+ makeRootNames();
+ initLongestMatchData();
+
+ /* Make the graph, do minimization. */
+ if ( graphDictEl == 0 )
+ sectionGraph = makeAll();
+ else
+ sectionGraph = makeSpecific( graphDictEl );
+
+ /* Compute exports from the export definitions. */
+ makeExports();
+
+ /* If any errors have occured in the input file then don't write anything. */
+ if ( gblErrorCount > 0 )
+ return;
+
+ analyzeGraph( sectionGraph );
+
+ /* Depends on the graph analysis. */
+ setLongestMatchData( sectionGraph );
+
+ /* Decide if an error state is necessary.
+ * 1. There is an error transition
+ * 2. There is a gap in the transitions
+ * 3. The longest match operator requires it. */
+ if ( lmRequiresErrorState || sectionGraph->hasErrorTrans() )
+ sectionGraph->errState = sectionGraph->addState();
+
+ /* State numbers need to be assigned such that all final states have a
+ * larger state id number than all non-final states. This enables the
+ * first_final mechanism to function correctly. We also want states to be
+ * ordered in a predictable fashion. So we first apply a depth-first
+ * search, then do a stable sort by final state status, then assign
+ * numbers. */
+
+ sectionGraph->depthFirstOrdering();
+ sectionGraph->sortStatesByFinal();
+ sectionGraph->setStateNumbers( 0 );
+}
+
+void ParseData::generateXML( ostream &out )
+{
+ beginProcessing();
+
+ /* Make the generator. */
+ XMLCodeGen codeGen( sectionName, this, sectionGraph, out );
+
+ /* Write out with it. */
+ codeGen.writeXML();
+
+ if ( printStatistics ) {
+ cerr << "fsm name : " << sectionName << endl;
+ cerr << "num states: " << sectionGraph->stateList.length() << endl;
+ cerr << endl;
+ }
+}
+
+/* Send eof to all parsers. */
+void terminateAllParsers( )
+{
+ /* FIXME: a proper token is needed here. Suppose we should use the
+ * location of EOF in the last file that the parser was referenced in. */
+ InputLoc loc;
+ loc.fileName = "<EOF>";
+ loc.line = 0;
+ loc.col = 0;
+ for ( ParserDict::Iter pdel = parserDict; pdel.lte(); pdel++ )
+ pdel->value->token( loc, _eof, 0, 0 );
+}
+
+void writeLanguage( std::ostream &out )
+{
+ out << " lang=\"";
+ switch ( hostLangType ) {
+ case CCode: out << "C"; break;
+ case DCode: out << "D"; break;
+ case JavaCode: out << "Java"; break;
+ case RubyCode: out << "Ruby"; break;
+ }
+ out << "\"";
+
+}
+
+void writeMachines( std::ostream &out, std::string hostData, const char *inputFileName )
+{
+ if ( machineSpec == 0 && machineName == 0 ) {
+ /* No machine spec or machine name given. Generate everything. */
+ for ( ParserDict::Iter parser = parserDict; parser.lte(); parser++ ) {
+ ParseData *pd = parser->value->pd;
+ if ( pd->instanceList.length() > 0 )
+ pd->prepareMachineGen( 0 );
+ }
+
+ if ( gblErrorCount == 0 ) {
+ out << "<ragel filename=\"" << inputFileName << "\"";
+ writeLanguage( out );
+ out << ">\n";
+ for ( ParserDict::Iter parser = parserDict; parser.lte(); parser++ ) {
+ ParseData *pd = parser->value->pd;
+ if ( pd->instanceList.length() > 0 )
+ pd->generateXML( out );
+ }
+ out << hostData;
+ out << "</ragel>\n";
+ }
+ }
+ else if ( parserDict.length() > 0 ) {
+ /* There is either a machine spec or machine name given. */
+ ParseData *parseData = 0;
+ GraphDictEl *graphDictEl = 0;
+
+ /* Traverse the sections, break out when we find a section/machine
+ * that matches the one specified. */
+ for ( ParserDict::Iter parser = parserDict; parser.lte(); parser++ ) {
+ ParseData *checkPd = parser->value->pd;
+ if ( machineSpec == 0 || strcmp( checkPd->sectionName, machineSpec ) == 0 ) {
+ GraphDictEl *checkGdEl = 0;
+ if ( machineName == 0 || (checkGdEl =
+ checkPd->graphDict.find( machineName )) != 0 )
+ {
+ /* Have a machine spec and/or machine name that matches
+ * the -M/-S options. */
+ parseData = checkPd;
+ graphDictEl = checkGdEl;
+ break;
+ }
+ }
+ }
+
+ if ( parseData == 0 )
+ error() << "could not locate machine specified with -S and/or -M" << endl;
+ else {
+ /* Section/Machine to emit was found. Prepare and emit it. */
+ parseData->prepareMachineGen( graphDictEl );
+ if ( gblErrorCount == 0 ) {
+ out << "<ragel filename=\"" << inputFileName << "\"";
+ writeLanguage( out );
+ out << ">\n";
+ parseData->generateXML( out );
+ out << hostData;
+ out << "</ragel>\n";
+ }
+ }
+ }
+}
diff --git a/contrib/tools/ragel5/ragel/parsedata.h b/contrib/tools/ragel5/ragel/parsedata.h
new file mode 100644
index 0000000000..2baa7373d2
--- /dev/null
+++ b/contrib/tools/ragel5/ragel/parsedata.h
@@ -0,0 +1,401 @@
+/*
+ * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _PARSEDATA_H
+#define _PARSEDATA_H
+
+#include <iostream>
+#include <limits.h>
+#include "avlmap.h"
+#include "bstmap.h"
+#include "vector.h"
+#include "dlist.h"
+#include "fsmgraph.h"
+#include "compare.h"
+#include "vector.h"
+#include "common.h"
+#include "parsetree.h"
+
+/* Forwards. */
+using std::ostream;
+
+struct VarDef;
+struct Join;
+struct Expression;
+struct Term;
+struct FactorWithAug;
+struct FactorWithLabel;
+struct FactorWithRep;
+struct FactorWithNeg;
+struct Factor;
+struct Literal;
+struct Range;
+struct RegExpr;
+struct ReItem;
+struct ReOrBlock;
+struct ReOrItem;
+struct LongestMatch;
+typedef DList<LongestMatch> LmList;
+
+/* Graph dictionary. */
+struct GraphDictEl
+:
+ public AvlTreeEl<GraphDictEl>,
+ public DListEl<GraphDictEl>
+{
+ GraphDictEl(const char *k )
+ : key(k), value(0), isInstance(false) { }
+ GraphDictEl(const char *k, VarDef *value )
+ : key(k), value(value), isInstance(false) { }
+
+ const char *getKey() { return key; }
+
+ const char *key;
+ VarDef *value;
+ bool isInstance;
+
+ /* Location info of graph definition. Points to variable name of assignment. */
+ InputLoc loc;
+};
+
+typedef AvlTree<GraphDictEl, char*, CmpStr> GraphDict;
+typedef DList<GraphDictEl> GraphList;
+
+/* Priority name dictionary. */
+typedef AvlMapEl<char*, int> PriorDictEl;
+typedef AvlMap<char*, int, CmpStr> PriorDict;
+
+/* Local error name dictionary. */
+typedef AvlMapEl<const char*, int> LocalErrDictEl;
+typedef AvlMap<const char*, int, CmpStr> LocalErrDict;
+
+/* Tree of instantiated names. */
+typedef BstMapEl<const char*, NameInst*> NameMapEl;
+typedef BstMap<const char*, NameInst*, CmpStr> NameMap;
+typedef Vector<NameInst*> NameVect;
+typedef BstSet<NameInst*> NameSet;
+
+/* Node in the tree of instantiated names. */
+struct NameInst
+{
+ NameInst( const InputLoc &loc, NameInst *parent, const char *name, int id, bool isLabel ) :
+ loc(loc), parent(parent), name(name), id(id), isLabel(isLabel),
+ isLongestMatch(false), numRefs(0), numUses(0), start(0), final(0) {}
+
+ InputLoc loc;
+
+ /* Keep parent pointers in the name tree to retrieve
+ * fully qulified names. */
+ NameInst *parent;
+
+ const char *name;
+ int id;
+ bool isLabel;
+ bool isLongestMatch;
+
+ int numRefs;
+ int numUses;
+
+ /* Names underneath us, excludes anonymous names. */
+ NameMap children;
+
+ /* All names underneath us in order of appearance. */
+ NameVect childVect;
+
+ /* Join scopes need an implicit "final" target. */
+ NameInst *start, *final;
+
+ /* During a fsm generation walk, lists the names that are referenced by
+ * epsilon operations in the current scope. After the link is made by the
+ * epsilon reference and the join operation is complete, the label can
+ * have its refcount decremented. Once there are no more references the
+ * entry point can be removed from the fsm returned. */
+ NameVect referencedNames;
+
+ /* Pointers for the name search queue. */
+ NameInst *prev, *next;
+
+ /* Check if this name inst or any name inst below is referenced. */
+ bool anyRefsRec();
+};
+
+typedef DList<NameInst> NameInstList;
+
+/* Stack frame used in walking the name tree. */
+struct NameFrame
+{
+ NameInst *prevNameInst;
+ int prevNameChild;
+ NameInst *prevLocalScope;
+};
+
+/* Class to collect information about the machine during the
+ * parse of input. */
+struct ParseData
+{
+ /* Create a new parse data object. This is done at the beginning of every
+ * fsm specification. */
+ ParseData(const char *fileName, char *sectionName, const InputLoc &sectionLoc );
+ ~ParseData();
+
+ /*
+ * Setting up the graph dict.
+ */
+
+ /* Initialize a graph dict with the basic fsms. */
+ void initGraphDict();
+ void createBuiltin(const char *name, BuiltinMachine builtin );
+
+ /* Make a name id in the current name instantiation scope if it is not
+ * already there. */
+ NameInst *addNameInst( const InputLoc &loc, const char *data, bool isLabel );
+ void makeRootNames();
+ void makeNameTree( GraphDictEl *gdNode );
+ void makeExportsNameTree();
+ void fillNameIndex( NameInst *from );
+ void printNameTree();
+
+ /* Increments the usage count on entry names. Names that are no longer
+ * needed will have their entry points unset. */
+ void unsetObsoleteEntries( FsmAp *graph );
+
+ /* Resove name references in action code and epsilon transitions. */
+ NameSet resolvePart( NameInst *refFrom, const char *data, bool recLabelsOnly );
+ void resolveFrom( NameSet &result, NameInst *refFrom,
+ const NameRef &nameRef, int namePos );
+ NameInst *resolveStateRef( const NameRef &nameRef, InputLoc &loc, Action *action );
+ void resolveNameRefs( InlineList *inlineList, Action *action );
+ void resolveActionNameRefs();
+
+ /* Set the alphabet type. If type types are not valid returns false. */
+ bool setAlphType( char *s1, char *s2 );
+ bool setAlphType( char *s1 );
+
+ /* Unique actions. */
+ void removeDups( ActionTable &actionTable );
+ void removeActionDups( FsmAp *graph );
+
+ /* Dumping the name instantiation tree. */
+ void printNameInst( NameInst *nameInst, int level );
+
+ /* Make the graph from a graph dict node. Does minimization. */
+ FsmAp *makeInstance( GraphDictEl *gdNode );
+ FsmAp *makeSpecific( GraphDictEl *gdNode );
+ FsmAp *makeAll();
+
+ /* Checking the contents of actions. */
+ void checkAction( Action *action );
+ void checkInlineList( Action *act, InlineList *inlineList );
+
+ void analyzeAction( Action *action, InlineList *inlineList );
+ void analyzeGraph( FsmAp *graph );
+ void makeExports();
+
+ void prepareMachineGen( GraphDictEl *graphDictEl );
+ void generateXML( ostream &out );
+ FsmAp *sectionGraph;
+ bool generatingSectionSubset;
+
+ void initKeyOps();
+
+ /*
+ * Data collected during the parse.
+ */
+
+ /* Dictionary of graphs. Both instances and non-instances go here. */
+ GraphDict graphDict;
+
+ /* The list of instances. */
+ GraphList instanceList;
+
+ /* Dictionary of actions. Lets actions be defined and then referenced. */
+ ActionDict actionDict;
+
+ /* Dictionary of named priorities. */
+ PriorDict priorDict;
+
+ /* Dictionary of named local errors. */
+ LocalErrDict localErrDict;
+
+ /* List of actions. Will be pasted into a switch statement. */
+ ActionList actionList;
+
+ /* The id of the next priority name and label. */
+ int nextPriorKey, nextLocalErrKey, nextNameId, nextCondId;
+
+ /* The default priority number key for a machine. This is active during
+ * the parse of the rhs of a machine assignment. */
+ int curDefPriorKey;
+
+ int curDefLocalErrKey;
+
+ /* Alphabet type. */
+ HostType *userAlphType;
+ bool alphTypeSet;
+
+ /* Element type and get key expression. */
+ InlineList *getKeyExpr;
+ InlineList *accessExpr;
+ InlineList *curStateExpr;
+
+ /* The alphabet range. */
+ char *lowerNum, *upperNum;
+ Key lowKey, highKey;
+ InputLoc rangeLowLoc, rangeHighLoc;
+
+ /* The name of the file the fsm is from, and the spec name. */
+ const char *fileName;
+ char *sectionName;
+ InputLoc sectionLoc;
+
+ /* Number of errors encountered parsing the fsm spec. */
+ int errorCount;
+
+ /* Counting the action and priority ordering. */
+ int curActionOrd;
+ int curPriorOrd;
+
+ /* Root of the name tree. One root is for the instantiated machines. The
+ * other root is for exported definitions. */
+ NameInst *rootName;
+ NameInst *exportsRootName;
+
+ /* Name tree walking. */
+ NameInst *curNameInst;
+ int curNameChild;
+
+ /* The place where resolved epsilon transitions go. These cannot go into
+ * the parse tree because a single epsilon op can resolve more than once
+ * to different nameInsts if the machine it's in is used more than once. */
+ NameVect epsilonResolvedLinks;
+ int nextEpsilonResolvedLink;
+
+ /* Root of the name tree used for doing local name searches. */
+ NameInst *localNameScope;
+
+ void setLmInRetLoc( InlineList *inlineList );
+ void initLongestMatchData();
+ void setLongestMatchData( FsmAp *graph );
+ void initNameWalk();
+ void initExportsNameWalk();
+ NameInst *nextNameScope() { return curNameInst->childVect[curNameChild]; }
+ NameFrame enterNameScope( bool isLocal, int numScopes );
+ void popNameScope( const NameFrame &frame );
+ void resetNameScope( const NameFrame &frame );
+
+ /* Make name ids to name inst pointers. */
+ NameInst **nameIndex;
+
+ /* Counter for assigning ids to longest match items. */
+ int nextLongestMatchId;
+ bool lmRequiresErrorState;
+
+ /* List of all longest match parse tree items. */
+ LmList lmList;
+
+ Action *newAction(const char *name, InlineList *inlineList );
+
+ Action *initTokStart;
+ int initTokStartOrd;
+
+ Action *setTokStart;
+ int setTokStartOrd;
+
+ Action *initActId;
+ int initActIdOrd;
+
+ Action *setTokEnd;
+ int setTokEndOrd;
+
+ void beginProcessing()
+ {
+ ::condData = &thisCondData;
+ ::keyOps = &thisKeyOps;
+ }
+
+ CondData thisCondData;
+ KeyOps thisKeyOps;
+
+ ExportList exportList;
+};
+
+void afterOpMinimize( FsmAp *fsm, bool lastInSeq = true );
+Key makeFsmKeyHex( char *str, const InputLoc &loc, ParseData *pd );
+Key makeFsmKeyDec( char *str, const InputLoc &loc, ParseData *pd );
+Key makeFsmKeyNum( char *str, const InputLoc &loc, ParseData *pd );
+Key makeFsmKeyChar( char c, ParseData *pd );
+void makeFsmKeyArray( Key *result, char *data, int len, ParseData *pd );
+void makeFsmUniqueKeyArray( KeySet &result, char *data, int len,
+ bool caseInsensitive, ParseData *pd );
+FsmAp *makeBuiltin( BuiltinMachine builtin, ParseData *pd );
+FsmAp *dotFsm( ParseData *pd );
+FsmAp *dotStarFsm( ParseData *pd );
+
+void errorStateLabels( const NameSet &locations );
+
+/* Data used by the parser specific to the current file. Supports the include
+ * system, since a new parser is executed for each included file. */
+struct InputData
+{
+ InputData( char *fileName, char *includeSpec, char *includeTo ) :
+ pd(0), sectionName(0), defaultParseData(0),
+ first_line(1), first_column(1),
+ last_line(1), last_column(0),
+ fileName(fileName), includeSpec(includeSpec),
+ includeTo(includeTo), active(true)
+ {}
+
+ /* For collecting a name references. */
+ NameRef nameRef;
+ NameRefList nameRefList;
+
+ /* The parse data. For each fsm spec, the parser collects things that it parses
+ * in data structures in here. */
+ ParseData *pd;
+
+ char *sectionName;
+ ParseData *defaultParseData;
+
+ int first_line;
+ int first_column;
+ int last_line;
+ int last_column;
+
+ char *fileName;
+
+ /* If this is an included file, this contains the specification to search
+ * for. IncludeTo will contain the spec name that does the includng. */
+ char *includeSpec;
+ char *includeTo;
+
+ bool active;
+ InputLoc sectionLoc;
+};
+
+struct Parser;
+
+typedef AvlMap<char*, Parser *, CmpStr> ParserDict;
+typedef AvlMapEl<char*, Parser *> ParserDictEl;
+
+extern ParserDict parserDict;
+
+
+#endif /* _PARSEDATA_H */
diff --git a/contrib/tools/ragel5/ragel/parsetree.cpp b/contrib/tools/ragel5/ragel/parsetree.cpp
new file mode 100644
index 0000000000..4755e3085b
--- /dev/null
+++ b/contrib/tools/ragel5/ragel/parsetree.cpp
@@ -0,0 +1,2089 @@
+/*
+ * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <iostream>
+#include <iomanip>
+#include <errno.h>
+#include <limits.h>
+#include <stdlib.h>
+
+/* Parsing. */
+#include "ragel.h"
+#include "rlparse.h"
+#include "parsetree.h"
+
+using namespace std;
+ostream &operator<<( ostream &out, const NameRef &nameRef );
+ostream &operator<<( ostream &out, const NameInst &nameInst );
+
+/* Convert the literal string which comes in from the scanner into an array of
+ * characters with escapes and options interpreted. Also null terminates the
+ * string. Though this null termination should not be relied on for
+ * interpreting literals in the parser because the string may contain a
+ * literal string with \0 */
+void Token::prepareLitString( Token &result, bool &caseInsensitive )
+{
+ result.data = new char[this->length+1];
+ caseInsensitive = false;
+
+ char *src = this->data + 1;
+ char *end = this->data + this->length - 1;
+
+ while ( *end != '\'' && *end != '\"' ) {
+ if ( *end == 'i' )
+ caseInsensitive = true;
+ else {
+ error( this->loc ) << "literal string '" << *end <<
+ "' option not supported" << endl;
+ }
+ end -= 1;
+ }
+
+ char *dest = result.data;
+ int len = 0;
+ while ( src != end ) {
+ if ( *src == '\\' ) {
+ switch ( src[1] ) {
+ case '0': dest[len++] = '\0'; break;
+ case 'a': dest[len++] = '\a'; break;
+ case 'b': dest[len++] = '\b'; break;
+ case 't': dest[len++] = '\t'; break;
+ case 'n': dest[len++] = '\n'; break;
+ case 'v': dest[len++] = '\v'; break;
+ case 'f': dest[len++] = '\f'; break;
+ case 'r': dest[len++] = '\r'; break;
+ case '\n': break;
+ default: dest[len++] = src[1]; break;
+ }
+ src += 2;
+ }
+ else {
+ dest[len++] = *src++;
+ }
+ }
+ result.length = len;
+ result.data[result.length] = 0;
+}
+
+
+FsmAp *VarDef::walk( ParseData *pd )
+{
+ /* We enter into a new name scope. */
+ NameFrame nameFrame = pd->enterNameScope( true, 1 );
+
+ /* Recurse on the expression. */
+ FsmAp *rtnVal = joinOrLm->walk( pd );
+
+ /* Do the tranfer of local error actions. */
+ LocalErrDictEl *localErrDictEl = pd->localErrDict.find( name );
+ if ( localErrDictEl != 0 ) {
+ for ( StateList::Iter state = rtnVal->stateList; state.lte(); state++ )
+ rtnVal->transferErrorActions( state, localErrDictEl->value );
+ }
+
+ /* If the expression below is a join operation with multiple expressions
+ * then it just had epsilon transisions resolved. If it is a join
+ * with only a single expression then run the epsilon op now. */
+ if ( joinOrLm->type == JoinOrLm::JoinType && joinOrLm->join->exprList.length() == 1 )
+ rtnVal->epsilonOp();
+
+ /* We can now unset entry points that are not longer used. */
+ pd->unsetObsoleteEntries( rtnVal );
+
+ /* If the name of the variable is referenced then add the entry point to
+ * the graph. */
+ if ( pd->curNameInst->numRefs > 0 )
+ rtnVal->setEntry( pd->curNameInst->id, rtnVal->startState );
+
+ /* Pop the name scope. */
+ pd->popNameScope( nameFrame );
+ return rtnVal;
+}
+
+void VarDef::makeNameTree( const InputLoc &loc, ParseData *pd )
+{
+ /* The variable definition enters a new scope. */
+ NameInst *prevNameInst = pd->curNameInst;
+ pd->curNameInst = pd->addNameInst( loc, name, false );
+
+ if ( joinOrLm->type == JoinOrLm::LongestMatchType )
+ pd->curNameInst->isLongestMatch = true;
+
+ /* Recurse. */
+ joinOrLm->makeNameTree( pd );
+
+ /* The name scope ends, pop the name instantiation. */
+ pd->curNameInst = prevNameInst;
+}
+
+void VarDef::resolveNameRefs( ParseData *pd )
+{
+ /* Entering into a new scope. */
+ NameFrame nameFrame = pd->enterNameScope( true, 1 );
+
+ /* Recurse. */
+ joinOrLm->resolveNameRefs( pd );
+
+ /* The name scope ends, pop the name instantiation. */
+ pd->popNameScope( nameFrame );
+}
+
+InputLoc LongestMatchPart::getLoc()
+{
+ return action != 0 ? action->loc : semiLoc;
+}
+
+/*
+ * If there are any LMs then all of the following entry points must reset
+ * tokstart:
+ *
+ * 1. fentry(StateRef)
+ * 2. ftoto(StateRef), fcall(StateRef), fnext(StateRef)
+ * 3. targt of any transition that has an fcall (the return loc).
+ * 4. start state of all longest match routines.
+ */
+
+Action *LongestMatch::newAction( ParseData *pd, const InputLoc &loc,
+ const char *name, InlineList *inlineList )
+{
+ Action *action = new Action( loc, name, inlineList, pd->nextCondId++ );
+ action->actionRefs.append( pd->curNameInst );
+ pd->actionList.append( action );
+ action->isLmAction = true;
+ return action;
+}
+
+void LongestMatch::makeActions( ParseData *pd )
+{
+ /* Make actions that set the action id. */
+ for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
+ /* For each part create actions for setting the match type. We need
+ * to do this so that the actions will go into the actionIndex. */
+ InlineList *inlineList = new InlineList;
+ inlineList->append( new InlineItem( lmi->getLoc(), this, lmi, InlineItem::LmSetActId ) );
+ char *actName = new char[50];
+ sprintf( actName, "store%i", lmi->longestMatchId );
+ lmi->setActId = newAction( pd, lmi->getLoc(), actName, inlineList );
+ }
+
+ /* Make actions that execute the user action and restart on the last character. */
+ for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
+ /* For each part create actions for setting the match type. We need
+ * to do this so that the actions will go into the actionIndex. */
+ InlineList *inlineList = new InlineList;
+ inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
+ InlineItem::LmOnLast ) );
+ char *actName = new char[50];
+ sprintf( actName, "imm%i", lmi->longestMatchId );
+ lmi->actOnLast = newAction( pd, lmi->getLoc(), actName, inlineList );
+ }
+
+ /* Make actions that execute the user action and restart on the next
+ * character. These actions will set tokend themselves (it is the current
+ * char). */
+ for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
+ /* For each part create actions for setting the match type. We need
+ * to do this so that the actions will go into the actionIndex. */
+ InlineList *inlineList = new InlineList;
+ inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
+ InlineItem::LmOnNext ) );
+ char *actName = new char[50];
+ sprintf( actName, "lagh%i", lmi->longestMatchId );
+ lmi->actOnNext = newAction( pd, lmi->getLoc(), actName, inlineList );
+ }
+
+ /* Make actions that execute the user action and restart at tokend. These
+ * actions execute some time after matching the last char. */
+ for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
+ /* For each part create actions for setting the match type. We need
+ * to do this so that the actions will go into the actionIndex. */
+ InlineList *inlineList = new InlineList;
+ inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
+ InlineItem::LmOnLagBehind ) );
+ char *actName = new char[50];
+ sprintf( actName, "lag%i", lmi->longestMatchId );
+ lmi->actLagBehind = newAction( pd, lmi->getLoc(), actName, inlineList );
+ }
+
+ InputLoc loc;
+ loc.line = 1;
+ loc.col = 1;
+
+ /* Create the error action. */
+ InlineList *il6 = new InlineList;
+ il6->append( new InlineItem( loc, this, 0, InlineItem::LmSwitch ) );
+ lmActSelect = newAction( pd, loc, "lagsel", il6 );
+}
+
+void LongestMatch::findName( ParseData *pd )
+{
+ NameInst *nameInst = pd->curNameInst;
+ while ( nameInst->name == 0 ) {
+ nameInst = nameInst->parent;
+ /* Since every machine must must have a name, we should always find a
+ * name for the longest match. */
+ assert( nameInst != 0 );
+ }
+ name = nameInst->name;
+}
+
+void LongestMatch::makeNameTree( ParseData *pd )
+{
+ /* Create an anonymous scope for the longest match. Will be used for
+ * restarting machine after matching a token. */
+ NameInst *prevNameInst = pd->curNameInst;
+ pd->curNameInst = pd->addNameInst( loc, 0, false );
+
+ /* Recurse into all parts of the longest match operator. */
+ for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ )
+ lmi->join->makeNameTree( pd );
+
+ /* Traverse the name tree upwards to find a name for this lm. */
+ findName( pd );
+
+ /* Also make the longest match's actions at this point. */
+ makeActions( pd );
+
+ /* The name scope ends, pop the name instantiation. */
+ pd->curNameInst = prevNameInst;
+}
+
+void LongestMatch::resolveNameRefs( ParseData *pd )
+{
+ /* The longest match gets its own name scope. */
+ NameFrame nameFrame = pd->enterNameScope( true, 1 );
+
+ /* Take an action reference for each longest match item and recurse. */
+ for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
+ /* Record the reference if the item has an action. */
+ if ( lmi->action != 0 )
+ lmi->action->actionRefs.append( pd->localNameScope );
+
+ /* Recurse down the join. */
+ lmi->join->resolveNameRefs( pd );
+ }
+
+ /* The name scope ends, pop the name instantiation. */
+ pd->popNameScope( nameFrame );
+}
+
+void LongestMatch::restart( FsmAp *graph, TransAp *trans )
+{
+ StateAp *fromState = trans->fromState;
+ graph->detachTrans( fromState, trans->toState, trans );
+ graph->attachTrans( fromState, graph->startState, trans );
+}
+
+void LongestMatch::runLonestMatch( ParseData *pd, FsmAp *graph )
+{
+ graph->markReachableFromHereStopFinal( graph->startState );
+ for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
+ if ( ms->stateBits & SB_ISMARKED ) {
+ ms->lmItemSet.insert( 0 );
+ ms->stateBits &= ~ SB_ISMARKED;
+ }
+ }
+
+ /* Transfer the first item of non-empty lmAction tables to the item sets
+ * of the states that follow. Exclude states that have no transitions out.
+ * This must happen on a separate pass so that on each iteration of the
+ * next pass we have the item set entries from all lmAction tables. */
+ for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
+ for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
+ if ( trans->lmActionTable.length() > 0 ) {
+ LmActionTableEl *lmAct = trans->lmActionTable.data;
+ StateAp *toState = trans->toState;
+ assert( toState );
+
+ /* Check if there are transitions out, this may be a very
+ * close approximation? Out transitions going nowhere?
+ * FIXME: Check. */
+ if ( toState->outList.length() > 0 ) {
+ /* Fill the item sets. */
+ graph->markReachableFromHereStopFinal( toState );
+ for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
+ if ( ms->stateBits & SB_ISMARKED ) {
+ ms->lmItemSet.insert( lmAct->value );
+ ms->stateBits &= ~ SB_ISMARKED;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ /* The lmItem sets are now filled, telling us which longest match rules
+ * can succeed in which states. First determine if we need to make sure
+ * act is defaulted to zero. We need to do this if there are any states
+ * with lmItemSet.length() > 1 and NULL is included. That is, that the
+ * switch may get called when in fact nothing has been matched. */
+ int maxItemSetLength = 0;
+ graph->markReachableFromHereStopFinal( graph->startState );
+ for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
+ if ( ms->stateBits & SB_ISMARKED ) {
+ if ( ms->lmItemSet.length() > maxItemSetLength )
+ maxItemSetLength = ms->lmItemSet.length();
+ ms->stateBits &= ~ SB_ISMARKED;
+ }
+ }
+
+ /* The actions executed on starting to match a token. */
+ graph->startState->toStateActionTable.setAction( pd->initTokStartOrd, pd->initTokStart );
+ graph->startState->fromStateActionTable.setAction( pd->setTokStartOrd, pd->setTokStart );
+ if ( maxItemSetLength > 1 ) {
+ /* The longest match action switch may be called when tokens are
+ * matched, in which case act must be initialized, there must be a
+ * case to handle the error, and the generated machine will require an
+ * error state. */
+ lmSwitchHandlesError = true;
+ pd->lmRequiresErrorState = true;
+ graph->startState->toStateActionTable.setAction( pd->initActIdOrd, pd->initActId );
+ }
+
+ /* The place to store transitions to restart. It maybe possible for the
+ * restarting to affect the searching through the graph that follows. For
+ * now take the safe route and save the list of transitions to restart
+ * until after all searching is done. */
+ Vector<TransAp*> restartTrans;
+
+ /* Set actions that do immediate token recognition, set the longest match part
+ * id and set the token ending. */
+ for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
+ for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
+ if ( trans->lmActionTable.length() > 0 ) {
+ LmActionTableEl *lmAct = trans->lmActionTable.data;
+ StateAp *toState = trans->toState;
+ assert( toState );
+
+ /* Check if there are transitions out, this may be a very
+ * close approximation? Out transitions going nowhere?
+ * FIXME: Check. */
+ if ( toState->outList.length() == 0 ) {
+ /* Can execute the immediate action for the longest match
+ * part. Redirect the action to the start state. */
+ trans->actionTable.setAction( lmAct->key,
+ lmAct->value->actOnLast );
+ restartTrans.append( trans );
+ }
+ else {
+ /* Look for non final states that have a non-empty item
+ * set. If these are present then we need to record the
+ * end of the token. Also Find the highest item set
+ * length reachable from here (excluding at transtions to
+ * final states). */
+ bool nonFinalNonEmptyItemSet = false;
+ maxItemSetLength = 0;
+ graph->markReachableFromHereStopFinal( toState );
+ for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
+ if ( ms->stateBits & SB_ISMARKED ) {
+ if ( ms->lmItemSet.length() > 0 && !ms->isFinState() )
+ nonFinalNonEmptyItemSet = true;
+ if ( ms->lmItemSet.length() > maxItemSetLength )
+ maxItemSetLength = ms->lmItemSet.length();
+ ms->stateBits &= ~ SB_ISMARKED;
+ }
+ }
+
+ /* If there are reachable states that are not final and
+ * have non empty item sets or that have an item set
+ * length greater than one then we need to set tokend
+ * because the error action that matches the token will
+ * require it. */
+ if ( nonFinalNonEmptyItemSet || maxItemSetLength > 1 )
+ trans->actionTable.setAction( pd->setTokEndOrd, pd->setTokEnd );
+
+ /* Some states may not know which longest match item to
+ * execute, must set it. */
+ if ( maxItemSetLength > 1 ) {
+ /* There are transitions out, another match may come. */
+ trans->actionTable.setAction( lmAct->key,
+ lmAct->value->setActId );
+ }
+ }
+ }
+ }
+ }
+
+ /* Now that all graph searching is done it certainly safe set the
+ * restarting. It may be safe above, however this must be verified. */
+ for ( Vector<TransAp*>::Iter pt = restartTrans; pt.lte(); pt++ )
+ restart( graph, *pt );
+
+ int lmErrActionOrd = pd->curActionOrd++;
+
+ /* Embed the error for recognizing a char. */
+ for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
+ if ( st->lmItemSet.length() == 1 && st->lmItemSet[0] != 0 ) {
+ if ( st->isFinState() ) {
+ /* On error execute the onActNext action, which knows that
+ * the last character of the token was one back and restart. */
+ graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
+ &st->lmItemSet[0]->actOnNext, 1 );
+ }
+ else {
+ graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
+ &st->lmItemSet[0]->actLagBehind, 1 );
+ }
+ }
+ else if ( st->lmItemSet.length() > 1 ) {
+ /* Need to use the select. Take note of the which items the select
+ * is needed for so only the necessary actions are included. */
+ for ( LmItemSet::Iter plmi = st->lmItemSet; plmi.lte(); plmi++ ) {
+ if ( *plmi != 0 )
+ (*plmi)->inLmSelect = true;
+ }
+ /* On error, execute the action select and go to the start state. */
+ graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
+ &lmActSelect, 1 );
+ }
+ }
+
+ /* Finally, the start state should be made final. */
+ graph->setFinState( graph->startState );
+}
+
+FsmAp *LongestMatch::walk( ParseData *pd )
+{
+ /* The longest match has it's own name scope. */
+ NameFrame nameFrame = pd->enterNameScope( true, 1 );
+
+ /* Make each part of the longest match. */
+ FsmAp **parts = new FsmAp*[longestMatchList->length()];
+ LmPartList::Iter lmi = *longestMatchList;
+ for ( int i = 0; lmi.lte(); lmi++, i++ ) {
+ /* Create the machine and embed the setting of the longest match id. */
+ parts[i] = lmi->join->walk( pd );
+ parts[i]->longMatchAction( pd->curActionOrd++, lmi );
+ }
+
+ /* Union machines one and up with machine zero. The grammar dictates that
+ * there will always be at least one part. */
+ FsmAp *rtnVal = parts[0];
+ for ( int i = 1; i < longestMatchList->length(); i++ ) {
+ rtnVal->unionOp( parts[i] );
+ afterOpMinimize( rtnVal );
+ }
+
+ runLonestMatch( pd, rtnVal );
+
+ /* Pop the name scope. */
+ pd->popNameScope( nameFrame );
+
+ delete[] parts;
+ return rtnVal;
+}
+
+FsmAp *JoinOrLm::walk( ParseData *pd )
+{
+ FsmAp *rtnVal = 0;
+ switch ( type ) {
+ case JoinType:
+ rtnVal = join->walk( pd );
+ break;
+ case LongestMatchType:
+ rtnVal = longestMatch->walk( pd );
+ break;
+ }
+ return rtnVal;
+}
+
+void JoinOrLm::makeNameTree( ParseData *pd )
+{
+ switch ( type ) {
+ case JoinType:
+ join->makeNameTree( pd );
+ break;
+ case LongestMatchType:
+ longestMatch->makeNameTree( pd );
+ break;
+ }
+}
+
+void JoinOrLm::resolveNameRefs( ParseData *pd )
+{
+ switch ( type ) {
+ case JoinType:
+ join->resolveNameRefs( pd );
+ break;
+ case LongestMatchType:
+ longestMatch->resolveNameRefs( pd );
+ break;
+ }
+}
+
+
+/* Construct with a location and the first expression. */
+Join::Join( const InputLoc &loc, Expression *expr )
+:
+ loc(loc)
+{
+ exprList.append( expr );
+}
+
+/* Construct with a location and the first expression. */
+Join::Join( Expression *expr )
+:
+ loc(loc)
+{
+ exprList.append( expr );
+}
+
+/* Walk an expression node. */
+FsmAp *Join::walk( ParseData *pd )
+{
+ if ( exprList.length() > 1 )
+ return walkJoin( pd );
+ else
+ return exprList.head->walk( pd );
+}
+
+/* There is a list of expressions to join. */
+FsmAp *Join::walkJoin( ParseData *pd )
+{
+ /* We enter into a new name scope. */
+ NameFrame nameFrame = pd->enterNameScope( true, 1 );
+
+ /* Evaluate the machines. */
+ FsmAp **fsms = new FsmAp*[exprList.length()];
+ ExprList::Iter expr = exprList;
+ for ( int e = 0; e < exprList.length(); e++, expr++ )
+ fsms[e] = expr->walk( pd );
+
+ /* Get the start and final names. Final is
+ * guaranteed to exist, start is not. */
+ NameInst *startName = pd->curNameInst->start;
+ NameInst *finalName = pd->curNameInst->final;
+
+ int startId = -1;
+ if ( startName != 0 ) {
+ /* Take note that there was an implicit link to the start machine. */
+ pd->localNameScope->referencedNames.append( startName );
+ startId = startName->id;
+ }
+
+ /* A final id of -1 indicates there is no epsilon that references the
+ * final state, therefor do not create one or set an entry point to it. */
+ int finalId = -1;
+ if ( finalName->numRefs > 0 )
+ finalId = finalName->id;
+
+ /* Join machines 1 and up onto machine 0. */
+ FsmAp *retFsm = fsms[0];
+ retFsm->joinOp( startId, finalId, fsms+1, exprList.length()-1 );
+
+ /* We can now unset entry points that are not longer used. */
+ pd->unsetObsoleteEntries( retFsm );
+
+ /* Pop the name scope. */
+ pd->popNameScope( nameFrame );
+
+ delete[] fsms;
+ return retFsm;
+}
+
+void Join::makeNameTree( ParseData *pd )
+{
+ if ( exprList.length() > 1 ) {
+ /* Create the new anonymous scope. */
+ NameInst *prevNameInst = pd->curNameInst;
+ pd->curNameInst = pd->addNameInst( loc, 0, false );
+
+ /* Join scopes need an implicit "final" target. */
+ pd->curNameInst->final = new NameInst( InputLoc(), pd->curNameInst, "final",
+ pd->nextNameId++, false );
+
+ /* Recurse into all expressions in the list. */
+ for ( ExprList::Iter expr = exprList; expr.lte(); expr++ )
+ expr->makeNameTree( pd );
+
+ /* The name scope ends, pop the name instantiation. */
+ pd->curNameInst = prevNameInst;
+ }
+ else {
+ /* Recurse into the single expression. */
+ exprList.head->makeNameTree( pd );
+ }
+}
+
+
+void Join::resolveNameRefs( ParseData *pd )
+{
+ /* Branch on whether or not there is to be a join. */
+ if ( exprList.length() > 1 ) {
+ /* The variable definition enters a new scope. */
+ NameFrame nameFrame = pd->enterNameScope( true, 1 );
+
+ /* The join scope must contain a start label. */
+ NameSet resolved = pd->resolvePart( pd->localNameScope, "start", true );
+ if ( resolved.length() > 0 ) {
+ /* Take the first. */
+ pd->curNameInst->start = resolved[0];
+ if ( resolved.length() > 1 ) {
+ /* Complain about the multiple references. */
+ error(loc) << "multiple start labels" << endl;
+ errorStateLabels( resolved );
+ }
+ }
+
+ /* Make sure there is a start label. */
+ if ( pd->curNameInst->start != 0 ) {
+ /* There is an implicit reference to start name. */
+ pd->curNameInst->start->numRefs += 1;
+ }
+ else {
+ /* No start label. Complain and recover by adding a label to the
+ * adding one. Recover ignoring the problem. */
+ error(loc) << "no start label" << endl;
+ }
+
+ /* Recurse into all expressions in the list. */
+ for ( ExprList::Iter expr = exprList; expr.lte(); expr++ )
+ expr->resolveNameRefs( pd );
+
+ /* The name scope ends, pop the name instantiation. */
+ pd->popNameScope( nameFrame );
+ }
+ else {
+ /* Recurse into the single expression. */
+ exprList.head->resolveNameRefs( pd );
+ }
+}
+
+/* Clean up after an expression node. */
+Expression::~Expression()
+{
+ switch ( type ) {
+ case OrType: case IntersectType: case SubtractType:
+ case StrongSubtractType:
+ delete expression;
+ delete term;
+ break;
+ case TermType:
+ delete term;
+ break;
+ case BuiltinType:
+ break;
+ }
+}
+
+/* Evaluate a single expression node. */
+FsmAp *Expression::walk( ParseData *pd, bool lastInSeq )
+{
+ FsmAp *rtnVal = 0;
+ switch ( type ) {
+ case OrType: {
+ /* Evaluate the expression. */
+ rtnVal = expression->walk( pd, false );
+ /* Evaluate the term. */
+ FsmAp *rhs = term->walk( pd );
+ /* Perform union. */
+ rtnVal->unionOp( rhs );
+ afterOpMinimize( rtnVal, lastInSeq );
+ break;
+ }
+ case IntersectType: {
+ /* Evaluate the expression. */
+ rtnVal = expression->walk( pd );
+ /* Evaluate the term. */
+ FsmAp *rhs = term->walk( pd );
+ /* Perform intersection. */
+ rtnVal->intersectOp( rhs );
+ afterOpMinimize( rtnVal, lastInSeq );
+ break;
+ }
+ case SubtractType: {
+ /* Evaluate the expression. */
+ rtnVal = expression->walk( pd );
+ /* Evaluate the term. */
+ FsmAp *rhs = term->walk( pd );
+ /* Perform subtraction. */
+ rtnVal->subtractOp( rhs );
+ afterOpMinimize( rtnVal, lastInSeq );
+ break;
+ }
+ case StrongSubtractType: {
+ /* Evaluate the expression. */
+ rtnVal = expression->walk( pd );
+
+ /* Evaluate the term and pad it with any* machines. */
+ FsmAp *rhs = dotStarFsm( pd );
+ FsmAp *termFsm = term->walk( pd );
+ FsmAp *trailAnyStar = dotStarFsm( pd );
+ rhs->concatOp( termFsm );
+ rhs->concatOp( trailAnyStar );
+
+ /* Perform subtraction. */
+ rtnVal->subtractOp( rhs );
+ afterOpMinimize( rtnVal, lastInSeq );
+ break;
+ }
+ case TermType: {
+ /* Return result of the term. */
+ rtnVal = term->walk( pd );
+ break;
+ }
+ case BuiltinType: {
+ /* Duplicate the builtin. */
+ rtnVal = makeBuiltin( builtin, pd );
+ break;
+ }
+ }
+
+ return rtnVal;
+}
+
+void Expression::makeNameTree( ParseData *pd )
+{
+ switch ( type ) {
+ case OrType:
+ case IntersectType:
+ case SubtractType:
+ case StrongSubtractType:
+ expression->makeNameTree( pd );
+ term->makeNameTree( pd );
+ break;
+ case TermType:
+ term->makeNameTree( pd );
+ break;
+ case BuiltinType:
+ break;
+ }
+}
+
+void Expression::resolveNameRefs( ParseData *pd )
+{
+ switch ( type ) {
+ case OrType:
+ case IntersectType:
+ case SubtractType:
+ case StrongSubtractType:
+ expression->resolveNameRefs( pd );
+ term->resolveNameRefs( pd );
+ break;
+ case TermType:
+ term->resolveNameRefs( pd );
+ break;
+ case BuiltinType:
+ break;
+ }
+}
+
+/* Clean up after a term node. */
+Term::~Term()
+{
+ switch ( type ) {
+ case ConcatType:
+ case RightStartType:
+ case RightFinishType:
+ case LeftType:
+ delete term;
+ delete factorWithAug;
+ break;
+ case FactorWithAugType:
+ delete factorWithAug;
+ break;
+ }
+}
+
+/* Evaluate a term node. */
+FsmAp *Term::walk( ParseData *pd, bool lastInSeq )
+{
+ FsmAp *rtnVal = 0;
+ switch ( type ) {
+ case ConcatType: {
+ /* Evaluate the Term. */
+ rtnVal = term->walk( pd, false );
+ /* Evaluate the FactorWithRep. */
+ FsmAp *rhs = factorWithAug->walk( pd );
+ /* Perform concatenation. */
+ rtnVal->concatOp( rhs );
+ afterOpMinimize( rtnVal, lastInSeq );
+ break;
+ }
+ case RightStartType: {
+ /* Evaluate the Term. */
+ rtnVal = term->walk( pd );
+
+ /* Evaluate the FactorWithRep. */
+ FsmAp *rhs = factorWithAug->walk( pd );
+
+ /* Set up the priority descriptors. The left machine gets the
+ * lower priority where as the right get the higher start priority. */
+ priorDescs[0].key = pd->nextPriorKey++;
+ priorDescs[0].priority = 0;
+ rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
+
+ /* The start transitions right machine get the higher priority.
+ * Use the same unique key. */
+ priorDescs[1].key = priorDescs[0].key;
+ priorDescs[1].priority = 1;
+ rhs->startFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
+
+ /* Perform concatenation. */
+ rtnVal->concatOp( rhs );
+ afterOpMinimize( rtnVal, lastInSeq );
+ break;
+ }
+ case RightFinishType: {
+ /* Evaluate the Term. */
+ rtnVal = term->walk( pd );
+
+ /* Evaluate the FactorWithRep. */
+ FsmAp *rhs = factorWithAug->walk( pd );
+
+ /* Set up the priority descriptors. The left machine gets the
+ * lower priority where as the finishing transitions to the right
+ * get the higher priority. */
+ priorDescs[0].key = pd->nextPriorKey++;
+ priorDescs[0].priority = 0;
+ rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
+
+ /* The finishing transitions of the right machine get the higher
+ * priority. Use the same unique key. */
+ priorDescs[1].key = priorDescs[0].key;
+ priorDescs[1].priority = 1;
+ rhs->finishFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
+
+ /* Perform concatenation. */
+ rtnVal->concatOp( rhs );
+ afterOpMinimize( rtnVal, lastInSeq );
+ break;
+ }
+ case LeftType: {
+ /* Evaluate the Term. */
+ rtnVal = term->walk( pd );
+
+ /* Evaluate the FactorWithRep. */
+ FsmAp *rhs = factorWithAug->walk( pd );
+
+ /* Set up the priority descriptors. The left machine gets the
+ * higher priority. */
+ priorDescs[0].key = pd->nextPriorKey++;
+ priorDescs[0].priority = 1;
+ rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
+
+ /* The right machine gets the lower priority. Since
+ * startTransPrior might unnecessarily increase the number of
+ * states during the state machine construction process (due to
+ * isolation), we use allTransPrior instead, which has the same
+ * effect. */
+ priorDescs[1].key = priorDescs[0].key;
+ priorDescs[1].priority = 0;
+ rhs->allTransPrior( pd->curPriorOrd++, &priorDescs[1] );
+
+ /* Perform concatenation. */
+ rtnVal->concatOp( rhs );
+ afterOpMinimize( rtnVal, lastInSeq );
+ break;
+ }
+ case FactorWithAugType: {
+ rtnVal = factorWithAug->walk( pd );
+ break;
+ }
+ }
+ return rtnVal;
+}
+
+void Term::makeNameTree( ParseData *pd )
+{
+ switch ( type ) {
+ case ConcatType:
+ case RightStartType:
+ case RightFinishType:
+ case LeftType:
+ term->makeNameTree( pd );
+ factorWithAug->makeNameTree( pd );
+ break;
+ case FactorWithAugType:
+ factorWithAug->makeNameTree( pd );
+ break;
+ }
+}
+
+void Term::resolveNameRefs( ParseData *pd )
+{
+ switch ( type ) {
+ case ConcatType:
+ case RightStartType:
+ case RightFinishType:
+ case LeftType:
+ term->resolveNameRefs( pd );
+ factorWithAug->resolveNameRefs( pd );
+ break;
+ case FactorWithAugType:
+ factorWithAug->resolveNameRefs( pd );
+ break;
+ }
+}
+
+/* Clean up after a factor with augmentation node. */
+FactorWithAug::~FactorWithAug()
+{
+ delete factorWithRep;
+
+ /* Walk the vector of parser actions, deleting function names. */
+
+ /* Clean up priority descriptors. */
+ if ( priorDescs != 0 )
+ delete[] priorDescs;
+}
+
+void FactorWithAug::assignActions( ParseData *pd, FsmAp *graph, int *actionOrd )
+{
+ /* Assign actions. */
+ for ( int i = 0; i < actions.length(); i++ ) {
+ switch ( actions[i].type ) {
+ /* Transition actions. */
+ case at_start:
+ graph->startFsmAction( actionOrd[i], actions[i].action );
+ afterOpMinimize( graph );
+ break;
+ case at_all:
+ graph->allTransAction( actionOrd[i], actions[i].action );
+ break;
+ case at_finish:
+ graph->finishFsmAction( actionOrd[i], actions[i].action );
+ break;
+ case at_leave:
+ graph->leaveFsmAction( actionOrd[i], actions[i].action );
+ break;
+
+ /* Global error actions. */
+ case at_start_gbl_error:
+ graph->startErrorAction( actionOrd[i], actions[i].action, 0 );
+ afterOpMinimize( graph );
+ break;
+ case at_all_gbl_error:
+ graph->allErrorAction( actionOrd[i], actions[i].action, 0 );
+ break;
+ case at_final_gbl_error:
+ graph->finalErrorAction( actionOrd[i], actions[i].action, 0 );
+ break;
+ case at_not_start_gbl_error:
+ graph->notStartErrorAction( actionOrd[i], actions[i].action, 0 );
+ break;
+ case at_not_final_gbl_error:
+ graph->notFinalErrorAction( actionOrd[i], actions[i].action, 0 );
+ break;
+ case at_middle_gbl_error:
+ graph->middleErrorAction( actionOrd[i], actions[i].action, 0 );
+ break;
+
+ /* Local error actions. */
+ case at_start_local_error:
+ graph->startErrorAction( actionOrd[i], actions[i].action,
+ actions[i].localErrKey );
+ afterOpMinimize( graph );
+ break;
+ case at_all_local_error:
+ graph->allErrorAction( actionOrd[i], actions[i].action,
+ actions[i].localErrKey );
+ break;
+ case at_final_local_error:
+ graph->finalErrorAction( actionOrd[i], actions[i].action,
+ actions[i].localErrKey );
+ break;
+ case at_not_start_local_error:
+ graph->notStartErrorAction( actionOrd[i], actions[i].action,
+ actions[i].localErrKey );
+ break;
+ case at_not_final_local_error:
+ graph->notFinalErrorAction( actionOrd[i], actions[i].action,
+ actions[i].localErrKey );
+ break;
+ case at_middle_local_error:
+ graph->middleErrorAction( actionOrd[i], actions[i].action,
+ actions[i].localErrKey );
+ break;
+
+ /* EOF actions. */
+ case at_start_eof:
+ graph->startEOFAction( actionOrd[i], actions[i].action );
+ afterOpMinimize( graph );
+ break;
+ case at_all_eof:
+ graph->allEOFAction( actionOrd[i], actions[i].action );
+ break;
+ case at_final_eof:
+ graph->finalEOFAction( actionOrd[i], actions[i].action );
+ break;
+ case at_not_start_eof:
+ graph->notStartEOFAction( actionOrd[i], actions[i].action );
+ break;
+ case at_not_final_eof:
+ graph->notFinalEOFAction( actionOrd[i], actions[i].action );
+ break;
+ case at_middle_eof:
+ graph->middleEOFAction( actionOrd[i], actions[i].action );
+ break;
+
+ /* To State Actions. */
+ case at_start_to_state:
+ graph->startToStateAction( actionOrd[i], actions[i].action );
+ afterOpMinimize( graph );
+ break;
+ case at_all_to_state:
+ graph->allToStateAction( actionOrd[i], actions[i].action );
+ break;
+ case at_final_to_state:
+ graph->finalToStateAction( actionOrd[i], actions[i].action );
+ break;
+ case at_not_start_to_state:
+ graph->notStartToStateAction( actionOrd[i], actions[i].action );
+ break;
+ case at_not_final_to_state:
+ graph->notFinalToStateAction( actionOrd[i], actions[i].action );
+ break;
+ case at_middle_to_state:
+ graph->middleToStateAction( actionOrd[i], actions[i].action );
+ break;
+
+ /* From State Actions. */
+ case at_start_from_state:
+ graph->startFromStateAction( actionOrd[i], actions[i].action );
+ afterOpMinimize( graph );
+ break;
+ case at_all_from_state:
+ graph->allFromStateAction( actionOrd[i], actions[i].action );
+ break;
+ case at_final_from_state:
+ graph->finalFromStateAction( actionOrd[i], actions[i].action );
+ break;
+ case at_not_start_from_state:
+ graph->notStartFromStateAction( actionOrd[i], actions[i].action );
+ break;
+ case at_not_final_from_state:
+ graph->notFinalFromStateAction( actionOrd[i], actions[i].action );
+ break;
+ case at_middle_from_state:
+ graph->middleFromStateAction( actionOrd[i], actions[i].action );
+ break;
+
+ /* Remaining cases, prevented by the parser. */
+ default:
+ assert( false );
+ break;
+ }
+ }
+}
+
+void FactorWithAug::assignPriorities( FsmAp *graph, int *priorOrd )
+{
+ /* Assign priorities. */
+ for ( int i = 0; i < priorityAugs.length(); i++ ) {
+ switch ( priorityAugs[i].type ) {
+ case at_start:
+ graph->startFsmPrior( priorOrd[i], &priorDescs[i]);
+ /* Start fsm priorities are a special case that may require
+ * minimization afterwards. */
+ afterOpMinimize( graph );
+ break;
+ case at_all:
+ graph->allTransPrior( priorOrd[i], &priorDescs[i] );
+ break;
+ case at_finish:
+ graph->finishFsmPrior( priorOrd[i], &priorDescs[i] );
+ break;
+ case at_leave:
+ graph->leaveFsmPrior( priorOrd[i], &priorDescs[i] );
+ break;
+
+ default:
+ /* Parser Prevents this case. */
+ break;
+ }
+ }
+}
+
+void FactorWithAug::assignConditions( FsmAp *graph )
+{
+ for ( int i = 0; i < conditions.length(); i++ ) {
+ switch ( conditions[i].type ) {
+ /* Transition actions. */
+ case at_start:
+ graph->startFsmCondition( conditions[i].action );
+ afterOpMinimize( graph );
+ break;
+ case at_all:
+ graph->allTransCondition( conditions[i].action );
+ break;
+ case at_leave:
+ graph->leaveFsmCondition( conditions[i].action );
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+
+/* Evaluate a factor with augmentation node. */
+FsmAp *FactorWithAug::walk( ParseData *pd )
+{
+ /* Enter into the scopes created for the labels. */
+ NameFrame nameFrame = pd->enterNameScope( false, labels.length() );
+
+ /* Make the array of function orderings. */
+ int *actionOrd = 0;
+ if ( actions.length() > 0 )
+ actionOrd = new int[actions.length()];
+
+ /* First walk the list of actions, assigning order to all starting
+ * actions. */
+ for ( int i = 0; i < actions.length(); i++ ) {
+ if ( actions[i].type == at_start ||
+ actions[i].type == at_start_gbl_error ||
+ actions[i].type == at_start_local_error ||
+ actions[i].type == at_start_to_state ||
+ actions[i].type == at_start_from_state ||
+ actions[i].type == at_start_eof )
+ actionOrd[i] = pd->curActionOrd++;
+ }
+
+ /* Evaluate the factor with repetition. */
+ FsmAp *rtnVal = factorWithRep->walk( pd );
+
+ /* Compute the remaining action orderings. */
+ for ( int i = 0; i < actions.length(); i++ ) {
+ if ( actions[i].type != at_start &&
+ actions[i].type != at_start_gbl_error &&
+ actions[i].type != at_start_local_error &&
+ actions[i].type != at_start_to_state &&
+ actions[i].type != at_start_from_state &&
+ actions[i].type != at_start_eof )
+ actionOrd[i] = pd->curActionOrd++;
+ }
+
+ /* Embed conditions. */
+ assignConditions( rtnVal );
+
+ /* Embed actions. */
+ assignActions( pd, rtnVal , actionOrd );
+
+ /* Make the array of priority orderings. Orderings are local to this walk
+ * of the factor with augmentation. */
+ int *priorOrd = 0;
+ if ( priorityAugs.length() > 0 )
+ priorOrd = new int[priorityAugs.length()];
+
+ /* Walk all priorities, assigning the priority ordering. */
+ for ( int i = 0; i < priorityAugs.length(); i++ )
+ priorOrd[i] = pd->curPriorOrd++;
+
+ /* If the priority descriptors have not been made, make them now. Make
+ * priority descriptors for each priority asignment that will be passed to
+ * the fsm. Used to keep track of the key, value and used bit. */
+ if ( priorDescs == 0 && priorityAugs.length() > 0 ) {
+ priorDescs = new PriorDesc[priorityAugs.length()];
+ for ( int i = 0; i < priorityAugs.length(); i++ ) {
+ /* Init the prior descriptor for the priority setting. */
+ priorDescs[i].key = priorityAugs[i].priorKey;
+ priorDescs[i].priority = priorityAugs[i].priorValue;
+ }
+ }
+
+ /* Assign priorities into the machine. */
+ assignPriorities( rtnVal, priorOrd );
+
+ /* Assign epsilon transitions. */
+ for ( int e = 0; e < epsilonLinks.length(); e++ ) {
+ /* Get the name, which may not exist. If it doesn't then silently
+ * ignore it because an error has already been reported. */
+ NameInst *epTarg = pd->epsilonResolvedLinks[pd->nextEpsilonResolvedLink++];
+ if ( epTarg != 0 ) {
+ /* Make the epsilon transitions. */
+ rtnVal->epsilonTrans( epTarg->id );
+
+ /* Note that we have made a link to the name. */
+ pd->localNameScope->referencedNames.append( epTarg );
+ }
+ }
+
+ /* Set entry points for labels. */
+ if ( labels.length() > 0 ) {
+ /* Pop the names. */
+ pd->resetNameScope( nameFrame );
+
+ /* Make labels that are referenced into entry points. */
+ for ( int i = 0; i < labels.length(); i++ ) {
+ pd->enterNameScope( false, 1 );
+
+ /* Will always be found. */
+ NameInst *name = pd->curNameInst;
+
+ /* If the name is referenced then set the entry point. */
+ if ( name->numRefs > 0 )
+ rtnVal->setEntry( name->id, rtnVal->startState );
+ }
+
+ pd->popNameScope( nameFrame );
+ }
+
+ if ( priorOrd != 0 )
+ delete[] priorOrd;
+ if ( actionOrd != 0 )
+ delete[] actionOrd;
+ return rtnVal;
+}
+
+void FactorWithAug::makeNameTree( ParseData *pd )
+{
+ /* Add the labels to the tree of instantiated names. Each label
+ * makes a new scope. */
+ NameInst *prevNameInst = pd->curNameInst;
+ for ( int i = 0; i < labels.length(); i++ )
+ pd->curNameInst = pd->addNameInst( labels[i].loc, labels[i].data, true );
+
+ /* Recurse, then pop the names. */
+ factorWithRep->makeNameTree( pd );
+ pd->curNameInst = prevNameInst;
+}
+
+
+void FactorWithAug::resolveNameRefs( ParseData *pd )
+{
+ /* Enter into the name scope created by any labels. */
+ NameFrame nameFrame = pd->enterNameScope( false, labels.length() );
+
+ /* Note action references. */
+ for ( int i = 0; i < actions.length(); i++ )
+ actions[i].action->actionRefs.append( pd->localNameScope );
+
+ /* Recurse first. IMPORTANT: we must do the exact same traversal as when
+ * the tree is constructed. */
+ factorWithRep->resolveNameRefs( pd );
+
+ /* Resolve epsilon transitions. */
+ for ( int ep = 0; ep < epsilonLinks.length(); ep++ ) {
+ /* Get the link. */
+ EpsilonLink &link = epsilonLinks[ep];
+ NameInst *resolvedName = 0;
+
+ if ( link.target.length() == 1 && strcmp( link.target.data[0], "final" ) == 0 ) {
+ /* Epsilon drawn to an implicit final state. An implicit final is
+ * only available in join operations. */
+ resolvedName = pd->localNameScope->final;
+ }
+ else {
+ /* Do an search for the name. */
+ NameSet resolved;
+ pd->resolveFrom( resolved, pd->localNameScope, link.target, 0 );
+ if ( resolved.length() > 0 ) {
+ /* Take the first one. */
+ resolvedName = resolved[0];
+ if ( resolved.length() > 1 ) {
+ /* Complain about the multiple references. */
+ error(link.loc) << "state reference " << link.target <<
+ " resolves to multiple entry points" << endl;
+ errorStateLabels( resolved );
+ }
+ }
+ }
+
+ /* This is tricky, we stuff resolved epsilon transitions into one long
+ * vector in the parse data structure. Since the name resolution and
+ * graph generation both do identical walks of the parse tree we
+ * should always find the link resolutions in the right place. */
+ pd->epsilonResolvedLinks.append( resolvedName );
+
+ if ( resolvedName != 0 ) {
+ /* Found the name, bump of the reference count on it. */
+ resolvedName->numRefs += 1;
+ }
+ else {
+ /* Complain, no recovery action, the epsilon op will ignore any
+ * epsilon transitions whose names did not resolve. */
+ error(link.loc) << "could not resolve label " << link.target << endl;
+ }
+ }
+
+ if ( labels.length() > 0 )
+ pd->popNameScope( nameFrame );
+}
+
+
+/* Clean up after a factor with repetition node. */
+FactorWithRep::~FactorWithRep()
+{
+ switch ( type ) {
+ case StarType: case StarStarType: case OptionalType: case PlusType:
+ case ExactType: case MaxType: case MinType: case RangeType:
+ delete factorWithRep;
+ break;
+ case FactorWithNegType:
+ delete factorWithNeg;
+ break;
+ }
+}
+
+/* Evaluate a factor with repetition node. */
+FsmAp *FactorWithRep::walk( ParseData *pd )
+{
+ FsmAp *retFsm = 0;
+
+ switch ( type ) {
+ case StarType: {
+ /* Evaluate the FactorWithRep. */
+ retFsm = factorWithRep->walk( pd );
+ if ( retFsm->startState->isFinState() ) {
+ warning(loc) << "applying kleene star to a machine that "
+ "accepts zero length word" << endl;
+ }
+
+ /* Shift over the start action orders then do the kleene star. */
+ pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
+ retFsm->starOp( );
+ afterOpMinimize( retFsm );
+ break;
+ }
+ case StarStarType: {
+ /* Evaluate the FactorWithRep. */
+ retFsm = factorWithRep->walk( pd );
+ if ( retFsm->startState->isFinState() ) {
+ warning(loc) << "applying kleene star to a machine that "
+ "accepts zero length word" << endl;
+ }
+
+ /* Set up the prior descs. All gets priority one, whereas leaving gets
+ * priority zero. Make a unique key so that these priorities don't
+ * interfere with any priorities set by the user. */
+ priorDescs[0].key = pd->nextPriorKey++;
+ priorDescs[0].priority = 1;
+ retFsm->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
+
+ /* Leaveing gets priority 0. Use same unique key. */
+ priorDescs[1].key = priorDescs[0].key;
+ priorDescs[1].priority = 0;
+ retFsm->leaveFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
+
+ /* Shift over the start action orders then do the kleene star. */
+ pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
+ retFsm->starOp( );
+ afterOpMinimize( retFsm );
+ break;
+ }
+ case OptionalType: {
+ /* Make the null fsm. */
+ FsmAp *nu = new FsmAp();
+ nu->lambdaFsm( );
+
+ /* Evaluate the FactorWithRep. */
+ retFsm = factorWithRep->walk( pd );
+
+ /* Perform the question operator. */
+ retFsm->unionOp( nu );
+ afterOpMinimize( retFsm );
+ break;
+ }
+ case PlusType: {
+ /* Evaluate the FactorWithRep. */
+ retFsm = factorWithRep->walk( pd );
+ if ( retFsm->startState->isFinState() ) {
+ warning(loc) << "applying plus operator to a machine that "
+ "accpets zero length word" << endl;
+ }
+
+ /* Need a duplicated for the star end. */
+ FsmAp *dup = new FsmAp( *retFsm );
+
+ /* The start func orders need to be shifted before doing the star. */
+ pd->curActionOrd += dup->shiftStartActionOrder( pd->curActionOrd );
+
+ /* Star the duplicate. */
+ dup->starOp( );
+ afterOpMinimize( dup );
+
+ retFsm->concatOp( dup );
+ afterOpMinimize( retFsm );
+ break;
+ }
+ case ExactType: {
+ /* Get an int from the repetition amount. */
+ if ( lowerRep == 0 ) {
+ /* No copies. Don't need to evaluate the factorWithRep.
+ * This Defeats the purpose so give a warning. */
+ warning(loc) << "exactly zero repetitions results "
+ "in the null machine" << endl;
+
+ retFsm = new FsmAp();
+ retFsm->lambdaFsm();
+ }
+ else {
+ /* Evaluate the first FactorWithRep. */
+ retFsm = factorWithRep->walk( pd );
+ if ( retFsm->startState->isFinState() ) {
+ warning(loc) << "applying repetition to a machine that "
+ "accepts zero length word" << endl;
+ }
+
+ /* The start func orders need to be shifted before doing the
+ * repetition. */
+ pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
+
+ /* Do the repetition on the machine. Already guarded against n == 0 */
+ retFsm->repeatOp( lowerRep );
+ afterOpMinimize( retFsm );
+ }
+ break;
+ }
+ case MaxType: {
+ /* Get an int from the repetition amount. */
+ if ( upperRep == 0 ) {
+ /* No copies. Don't need to evaluate the factorWithRep.
+ * This Defeats the purpose so give a warning. */
+ warning(loc) << "max zero repetitions results "
+ "in the null machine" << endl;
+
+ retFsm = new FsmAp();
+ retFsm->lambdaFsm();
+ }
+ else {
+ /* Evaluate the first FactorWithRep. */
+ retFsm = factorWithRep->walk( pd );
+ if ( retFsm->startState->isFinState() ) {
+ warning(loc) << "applying max repetition to a machine that "
+ "accepts zero length word" << endl;
+ }
+
+ /* The start func orders need to be shifted before doing the
+ * repetition. */
+ pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
+
+ /* Do the repetition on the machine. Already guarded against n == 0 */
+ retFsm->optionalRepeatOp( upperRep );
+ afterOpMinimize( retFsm );
+ }
+ break;
+ }
+ case MinType: {
+ /* Evaluate the repeated machine. */
+ retFsm = factorWithRep->walk( pd );
+ if ( retFsm->startState->isFinState() ) {
+ warning(loc) << "applying min repetition to a machine that "
+ "accepts zero length word" << endl;
+ }
+
+ /* The start func orders need to be shifted before doing the repetition
+ * and the kleene star. */
+ pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
+
+ if ( lowerRep == 0 ) {
+ /* Acts just like a star op on the machine to return. */
+ retFsm->starOp( );
+ afterOpMinimize( retFsm );
+ }
+ else {
+ /* Take a duplicate for the plus. */
+ FsmAp *dup = new FsmAp( *retFsm );
+
+ /* Do repetition on the first half. */
+ retFsm->repeatOp( lowerRep );
+ afterOpMinimize( retFsm );
+
+ /* Star the duplicate. */
+ dup->starOp( );
+ afterOpMinimize( dup );
+
+ /* Tak on the kleene star. */
+ retFsm->concatOp( dup );
+ afterOpMinimize( retFsm );
+ }
+ break;
+ }
+ case RangeType: {
+ /* Check for bogus range. */
+ if ( upperRep - lowerRep < 0 ) {
+ error(loc) << "invalid range repetition" << endl;
+
+ /* Return null machine as recovery. */
+ retFsm = new FsmAp();
+ retFsm->lambdaFsm();
+ }
+ else if ( lowerRep == 0 && upperRep == 0 ) {
+ /* No copies. Don't need to evaluate the factorWithRep. This
+ * defeats the purpose so give a warning. */
+ warning(loc) << "zero to zero repetitions results "
+ "in the null machine" << endl;
+
+ retFsm = new FsmAp();
+ retFsm->lambdaFsm();
+ }
+ else {
+ /* Now need to evaluate the repeated machine. */
+ retFsm = factorWithRep->walk( pd );
+ if ( retFsm->startState->isFinState() ) {
+ warning(loc) << "applying range repetition to a machine that "
+ "accepts zero length word" << endl;
+ }
+
+ /* The start func orders need to be shifted before doing both kinds
+ * of repetition. */
+ pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
+
+ if ( lowerRep == 0 ) {
+ /* Just doing max repetition. Already guarded against n == 0. */
+ retFsm->optionalRepeatOp( upperRep );
+ afterOpMinimize( retFsm );
+ }
+ else if ( lowerRep == upperRep ) {
+ /* Just doing exact repetition. Already guarded against n == 0. */
+ retFsm->repeatOp( lowerRep );
+ afterOpMinimize( retFsm );
+ }
+ else {
+ /* This is the case that 0 < lowerRep < upperRep. Take a
+ * duplicate for the optional repeat. */
+ FsmAp *dup = new FsmAp( *retFsm );
+
+ /* Do repetition on the first half. */
+ retFsm->repeatOp( lowerRep );
+ afterOpMinimize( retFsm );
+
+ /* Do optional repetition on the second half. */
+ dup->optionalRepeatOp( upperRep - lowerRep );
+ afterOpMinimize( dup );
+
+ /* Tak on the duplicate machine. */
+ retFsm->concatOp( dup );
+ afterOpMinimize( retFsm );
+ }
+ }
+ break;
+ }
+ case FactorWithNegType: {
+ /* Evaluate the Factor. Pass it up. */
+ retFsm = factorWithNeg->walk( pd );
+ break;
+ }}
+ return retFsm;
+}
+
+void FactorWithRep::makeNameTree( ParseData *pd )
+{
+ switch ( type ) {
+ case StarType:
+ case StarStarType:
+ case OptionalType:
+ case PlusType:
+ case ExactType:
+ case MaxType:
+ case MinType:
+ case RangeType:
+ factorWithRep->makeNameTree( pd );
+ break;
+ case FactorWithNegType:
+ factorWithNeg->makeNameTree( pd );
+ break;
+ }
+}
+
+void FactorWithRep::resolveNameRefs( ParseData *pd )
+{
+ switch ( type ) {
+ case StarType:
+ case StarStarType:
+ case OptionalType:
+ case PlusType:
+ case ExactType:
+ case MaxType:
+ case MinType:
+ case RangeType:
+ factorWithRep->resolveNameRefs( pd );
+ break;
+ case FactorWithNegType:
+ factorWithNeg->resolveNameRefs( pd );
+ break;
+ }
+}
+
+/* Clean up after a factor with negation node. */
+FactorWithNeg::~FactorWithNeg()
+{
+ switch ( type ) {
+ case NegateType:
+ case CharNegateType:
+ delete factorWithNeg;
+ break;
+ case FactorType:
+ delete factor;
+ break;
+ }
+}
+
+/* Evaluate a factor with negation node. */
+FsmAp *FactorWithNeg::walk( ParseData *pd )
+{
+ FsmAp *retFsm = 0;
+
+ switch ( type ) {
+ case NegateType: {
+ /* Evaluate the factorWithNeg. */
+ FsmAp *toNegate = factorWithNeg->walk( pd );
+
+ /* Negation is subtract from dot-star. */
+ retFsm = dotStarFsm( pd );
+ retFsm->subtractOp( toNegate );
+ afterOpMinimize( retFsm );
+ break;
+ }
+ case CharNegateType: {
+ /* Evaluate the factorWithNeg. */
+ FsmAp *toNegate = factorWithNeg->walk( pd );
+
+ /* CharNegation is subtract from dot. */
+ retFsm = dotFsm( pd );
+ retFsm->subtractOp( toNegate );
+ afterOpMinimize( retFsm );
+ break;
+ }
+ case FactorType: {
+ /* Evaluate the Factor. Pass it up. */
+ retFsm = factor->walk( pd );
+ break;
+ }}
+ return retFsm;
+}
+
+void FactorWithNeg::makeNameTree( ParseData *pd )
+{
+ switch ( type ) {
+ case NegateType:
+ case CharNegateType:
+ factorWithNeg->makeNameTree( pd );
+ break;
+ case FactorType:
+ factor->makeNameTree( pd );
+ break;
+ }
+}
+
+void FactorWithNeg::resolveNameRefs( ParseData *pd )
+{
+ switch ( type ) {
+ case NegateType:
+ case CharNegateType:
+ factorWithNeg->resolveNameRefs( pd );
+ break;
+ case FactorType:
+ factor->resolveNameRefs( pd );
+ break;
+ }
+}
+
+/* Clean up after a factor node. */
+Factor::~Factor()
+{
+ switch ( type ) {
+ case LiteralType:
+ delete literal;
+ break;
+ case RangeType:
+ delete range;
+ break;
+ case OrExprType:
+ delete reItem;
+ break;
+ case RegExprType:
+ delete regExpr;
+ break;
+ case ReferenceType:
+ break;
+ case ParenType:
+ delete join;
+ break;
+ case LongestMatchType:
+ delete longestMatch;
+ break;
+ }
+}
+
+/* Evaluate a factor node. */
+FsmAp *Factor::walk( ParseData *pd )
+{
+ FsmAp *rtnVal = 0;
+ switch ( type ) {
+ case LiteralType:
+ rtnVal = literal->walk( pd );
+ break;
+ case RangeType:
+ rtnVal = range->walk( pd );
+ break;
+ case OrExprType:
+ rtnVal = reItem->walk( pd, 0 );
+ break;
+ case RegExprType:
+ rtnVal = regExpr->walk( pd, 0 );
+ break;
+ case ReferenceType:
+ rtnVal = varDef->walk( pd );
+ break;
+ case ParenType:
+ rtnVal = join->walk( pd );
+ break;
+ case LongestMatchType:
+ rtnVal = longestMatch->walk( pd );
+ break;
+ }
+
+ return rtnVal;
+}
+
+void Factor::makeNameTree( ParseData *pd )
+{
+ switch ( type ) {
+ case LiteralType:
+ case RangeType:
+ case OrExprType:
+ case RegExprType:
+ break;
+ case ReferenceType:
+ varDef->makeNameTree( loc, pd );
+ break;
+ case ParenType:
+ join->makeNameTree( pd );
+ break;
+ case LongestMatchType:
+ longestMatch->makeNameTree( pd );
+ break;
+ }
+}
+
+void Factor::resolveNameRefs( ParseData *pd )
+{
+ switch ( type ) {
+ case LiteralType:
+ case RangeType:
+ case OrExprType:
+ case RegExprType:
+ break;
+ case ReferenceType:
+ varDef->resolveNameRefs( pd );
+ break;
+ case ParenType:
+ join->resolveNameRefs( pd );
+ break;
+ case LongestMatchType:
+ longestMatch->resolveNameRefs( pd );
+ break;
+ }
+}
+
+/* Clean up a range object. Must delete the two literals. */
+Range::~Range()
+{
+ delete lowerLit;
+ delete upperLit;
+}
+
+/* Evaluate a range. Gets the lower an upper key and makes an fsm range. */
+FsmAp *Range::walk( ParseData *pd )
+{
+ /* Construct and verify the suitability of the lower end of the range. */
+ FsmAp *lowerFsm = lowerLit->walk( pd );
+ if ( !lowerFsm->checkSingleCharMachine() ) {
+ error(lowerLit->token.loc) <<
+ "bad range lower end, must be a single character" << endl;
+ }
+
+ /* Construct and verify the upper end. */
+ FsmAp *upperFsm = upperLit->walk( pd );
+ if ( !upperFsm->checkSingleCharMachine() ) {
+ error(upperLit->token.loc) <<
+ "bad range upper end, must be a single character" << endl;
+ }
+
+ /* Grab the keys from the machines, then delete them. */
+ Key lowKey = lowerFsm->startState->outList.head->lowKey;
+ Key highKey = upperFsm->startState->outList.head->lowKey;
+ delete lowerFsm;
+ delete upperFsm;
+
+ /* Validate the range. */
+ if ( lowKey > highKey ) {
+ /* Recover by setting upper to lower; */
+ error(lowerLit->token.loc) << "lower end of range is greater then upper end" << endl;
+ highKey = lowKey;
+ }
+
+ /* Return the range now that it is validated. */
+ FsmAp *retFsm = new FsmAp();
+ retFsm->rangeFsm( lowKey, highKey );
+ return retFsm;
+}
+
+/* Evaluate a literal object. */
+FsmAp *Literal::walk( ParseData *pd )
+{
+ /* FsmAp to return, is the alphabet signed. */
+ FsmAp *rtnVal = 0;
+
+ switch ( type ) {
+ case Number: {
+ /* Make the fsm key in int format. */
+ Key fsmKey = makeFsmKeyNum( token.data, token.loc, pd );
+ /* Make the new machine. */
+ rtnVal = new FsmAp();
+ rtnVal->concatFsm( fsmKey );
+ break;
+ }
+ case LitString: {
+ /* Make the array of keys in int format. */
+ Token interp;
+ bool caseInsensitive;
+ token.prepareLitString( interp, caseInsensitive );
+ Key *arr = new Key[interp.length];
+ makeFsmKeyArray( arr, interp.data, interp.length, pd );
+
+ /* Make the new machine. */
+ rtnVal = new FsmAp();
+ if ( caseInsensitive )
+ rtnVal->concatFsmCI( arr, interp.length );
+ else
+ rtnVal->concatFsm( arr, interp.length );
+ delete[] interp.data;
+ delete[] arr;
+ break;
+ }}
+ return rtnVal;
+}
+
+/* Clean up after a regular expression object. */
+RegExpr::~RegExpr()
+{
+ switch ( type ) {
+ case RecurseItem:
+ delete regExpr;
+ delete item;
+ break;
+ case Empty:
+ break;
+ }
+}
+
+/* Evaluate a regular expression object. */
+FsmAp *RegExpr::walk( ParseData *pd, RegExpr *rootRegex )
+{
+ /* This is the root regex, pass down a pointer to this. */
+ if ( rootRegex == 0 )
+ rootRegex = this;
+
+ FsmAp *rtnVal = 0;
+ switch ( type ) {
+ case RecurseItem: {
+ /* Walk both items. */
+ rtnVal = regExpr->walk( pd, rootRegex );
+ FsmAp *fsm2 = item->walk( pd, rootRegex );
+ rtnVal->concatOp( fsm2 );
+ break;
+ }
+ case Empty: {
+ rtnVal = new FsmAp();
+ rtnVal->lambdaFsm();
+ break;
+ }
+ }
+ return rtnVal;
+}
+
+/* Clean up after an item in a regular expression. */
+ReItem::~ReItem()
+{
+ switch ( type ) {
+ case Data:
+ case Dot:
+ break;
+ case OrBlock:
+ case NegOrBlock:
+ delete orBlock;
+ break;
+ }
+}
+
+/* Evaluate a regular expression object. */
+FsmAp *ReItem::walk( ParseData *pd, RegExpr *rootRegex )
+{
+ /* The fsm to return, is the alphabet signed? */
+ FsmAp *rtnVal = 0;
+
+ switch ( type ) {
+ case Data: {
+ /* Move the data into an integer array and make a concat fsm. */
+ Key *arr = new Key[token.length];
+ makeFsmKeyArray( arr, token.data, token.length, pd );
+
+ /* Make the concat fsm. */
+ rtnVal = new FsmAp();
+ if ( rootRegex != 0 && rootRegex->caseInsensitive )
+ rtnVal->concatFsmCI( arr, token.length );
+ else
+ rtnVal->concatFsm( arr, token.length );
+ delete[] arr;
+ break;
+ }
+ case Dot: {
+ /* Make the dot fsm. */
+ rtnVal = dotFsm( pd );
+ break;
+ }
+ case OrBlock: {
+ /* Get the or block and minmize it. */
+ rtnVal = orBlock->walk( pd, rootRegex );
+ if ( rtnVal == 0 ) {
+ rtnVal = new FsmAp();
+ rtnVal->lambdaFsm();
+ }
+ rtnVal->minimizePartition2();
+ break;
+ }
+ case NegOrBlock: {
+ /* Get the or block and minimize it. */
+ FsmAp *fsm = orBlock->walk( pd, rootRegex );
+ fsm->minimizePartition2();
+
+ /* Make a dot fsm and subtract from it. */
+ rtnVal = dotFsm( pd );
+ rtnVal->subtractOp( fsm );
+ rtnVal->minimizePartition2();
+ break;
+ }
+ }
+
+ /* If the item is followed by a star, then apply the star op. */
+ if ( star ) {
+ if ( rtnVal->startState->isFinState() ) {
+ warning(loc) << "applying kleene star to a machine that "
+ "accpets zero length word" << endl;
+ }
+
+ rtnVal->starOp();
+ rtnVal->minimizePartition2();
+ }
+ return rtnVal;
+}
+
+/* Clean up after an or block of a regular expression. */
+ReOrBlock::~ReOrBlock()
+{
+ switch ( type ) {
+ case RecurseItem:
+ delete orBlock;
+ delete item;
+ break;
+ case Empty:
+ break;
+ }
+}
+
+
+/* Evaluate an or block of a regular expression. */
+FsmAp *ReOrBlock::walk( ParseData *pd, RegExpr *rootRegex )
+{
+ FsmAp *rtnVal = 0;
+ switch ( type ) {
+ case RecurseItem: {
+ /* Evaluate the two fsm. */
+ FsmAp *fsm1 = orBlock->walk( pd, rootRegex );
+ FsmAp *fsm2 = item->walk( pd, rootRegex );
+ if ( fsm1 == 0 )
+ rtnVal = fsm2;
+ else {
+ fsm1->unionOp( fsm2 );
+ rtnVal = fsm1;
+ }
+ break;
+ }
+ case Empty: {
+ rtnVal = 0;
+ break;
+ }
+ }
+ return rtnVal;;
+}
+
+/* Evaluate an or block item of a regular expression. */
+FsmAp *ReOrItem::walk( ParseData *pd, RegExpr *rootRegex )
+{
+ /* The return value, is the alphabet signed? */
+ FsmAp *rtnVal = 0;
+ switch ( type ) {
+ case Data: {
+ /* Make the or machine. */
+ rtnVal = new FsmAp();
+
+ /* Put the or data into an array of ints. Note that we find unique
+ * keys. Duplicates are silently ignored. The alternative would be to
+ * issue warning or an error but since we can't with [a0-9a] or 'a' |
+ * 'a' don't bother here. */
+ KeySet keySet;
+ makeFsmUniqueKeyArray( keySet, token.data, token.length,
+ rootRegex != 0 ? rootRegex->caseInsensitive : false, pd );
+
+ /* Run the or operator. */
+ rtnVal->orFsm( keySet.data, keySet.length() );
+ break;
+ }
+ case Range: {
+ /* Make the upper and lower keys. */
+ Key lowKey = makeFsmKeyChar( lower, pd );
+ Key highKey = makeFsmKeyChar( upper, pd );
+
+ /* Validate the range. */
+ if ( lowKey > highKey ) {
+ /* Recover by setting upper to lower; */
+ error(loc) << "lower end of range is greater then upper end" << endl;
+ highKey = lowKey;
+ }
+
+ /* Make the range machine. */
+ rtnVal = new FsmAp();
+ rtnVal->rangeFsm( lowKey, highKey );
+
+ if ( rootRegex != 0 && rootRegex->caseInsensitive ) {
+ if ( lowKey <= 'Z' && 'A' <= highKey ) {
+ Key otherLow = lowKey < 'A' ? Key('A') : lowKey;
+ Key otherHigh = 'Z' < highKey ? Key('Z') : highKey;
+
+ otherLow = 'a' + ( otherLow - 'A' );
+ otherHigh = 'a' + ( otherHigh - 'A' );
+
+ FsmAp *otherRange = new FsmAp();
+ otherRange->rangeFsm( otherLow, otherHigh );
+ rtnVal->unionOp( otherRange );
+ rtnVal->minimizePartition2();
+ }
+ else if ( lowKey <= 'z' && 'a' <= highKey ) {
+ Key otherLow = lowKey < 'a' ? Key('a') : lowKey;
+ Key otherHigh = 'z' < highKey ? Key('z') : highKey;
+
+ otherLow = 'A' + ( otherLow - 'a' );
+ otherHigh = 'A' + ( otherHigh - 'a' );
+
+ FsmAp *otherRange = new FsmAp();
+ otherRange->rangeFsm( otherLow, otherHigh );
+ rtnVal->unionOp( otherRange );
+ rtnVal->minimizePartition2();
+ }
+ }
+
+ break;
+ }}
+ return rtnVal;
+}
diff --git a/contrib/tools/ragel5/ragel/parsetree.h b/contrib/tools/ragel5/ragel/parsetree.h
new file mode 100644
index 0000000000..4f398683a9
--- /dev/null
+++ b/contrib/tools/ragel5/ragel/parsetree.h
@@ -0,0 +1,755 @@
+/*
+ * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _PARSETREE_H
+#define _PARSETREE_H
+
+#include "ragel.h"
+#include "avlmap.h"
+#include "bstmap.h"
+#include "vector.h"
+#include "dlist.h"
+
+struct NameInst;
+
+/* Types of builtin machines. */
+enum BuiltinMachine
+{
+ BT_Any,
+ BT_Ascii,
+ BT_Extend,
+ BT_Alpha,
+ BT_Digit,
+ BT_Alnum,
+ BT_Lower,
+ BT_Upper,
+ BT_Cntrl,
+ BT_Graph,
+ BT_Print,
+ BT_Punct,
+ BT_Space,
+ BT_Xdigit,
+ BT_Lambda,
+ BT_Empty
+};
+
+
+struct ParseData;
+
+/* Leaf type. */
+struct Literal;
+
+/* Tree nodes. */
+
+struct Term;
+struct FactorWithAug;
+struct FactorWithRep;
+struct FactorWithNeg;
+struct Factor;
+struct Expression;
+struct Join;
+struct JoinOrLm;
+struct LongestMatch;
+struct LongestMatchPart;
+struct LmPartList;
+struct Range;
+
+/* Type of augmentation. Describes locations in the machine. */
+enum AugType
+{
+ /* Transition actions/priorities. */
+ at_start,
+ at_all,
+ at_finish,
+ at_leave,
+
+ /* Global error actions. */
+ at_start_gbl_error,
+ at_all_gbl_error,
+ at_final_gbl_error,
+ at_not_start_gbl_error,
+ at_not_final_gbl_error,
+ at_middle_gbl_error,
+
+ /* Local error actions. */
+ at_start_local_error,
+ at_all_local_error,
+ at_final_local_error,
+ at_not_start_local_error,
+ at_not_final_local_error,
+ at_middle_local_error,
+
+ /* To State Action embedding. */
+ at_start_to_state,
+ at_all_to_state,
+ at_final_to_state,
+ at_not_start_to_state,
+ at_not_final_to_state,
+ at_middle_to_state,
+
+ /* From State Action embedding. */
+ at_start_from_state,
+ at_all_from_state,
+ at_final_from_state,
+ at_not_start_from_state,
+ at_not_final_from_state,
+ at_middle_from_state,
+
+ /* EOF Action embedding. */
+ at_start_eof,
+ at_all_eof,
+ at_final_eof,
+ at_not_start_eof,
+ at_not_final_eof,
+ at_middle_eof
+};
+
+/* IMPORTANT: These must follow the same order as the state augs in AugType
+ * since we will be using this to compose AugType. */
+enum StateAugType
+{
+ sat_start = 0,
+ sat_all,
+ sat_final,
+ sat_not_start,
+ sat_not_final,
+ sat_middle
+};
+
+struct Action;
+struct PriorDesc;
+struct RegExpr;
+struct ReItem;
+struct ReOrBlock;
+struct ReOrItem;
+struct ExplicitMachine;
+struct InlineItem;
+struct InlineList;
+
+/* Reference to a named state. */
+typedef Vector<char*> NameRef;
+typedef Vector<NameRef*> NameRefList;
+typedef Vector<NameInst*> NameTargList;
+
+/* Structure for storing location of epsilon transitons. */
+struct EpsilonLink
+{
+ EpsilonLink( const InputLoc &loc, NameRef &target )
+ : loc(loc), target(target) { }
+
+ InputLoc loc;
+ NameRef target;
+};
+
+struct Label
+{
+ Label( const InputLoc &loc, char *data )
+ : loc(loc), data(data) { }
+
+ InputLoc loc;
+ char *data;
+};
+
+/* Structrue represents an action assigned to some FactorWithAug node. The
+ * factor with aug will keep an array of these. */
+struct ParserAction
+{
+ ParserAction( const InputLoc &loc, AugType type, int localErrKey, Action *action )
+ : loc(loc), type(type), localErrKey(localErrKey), action(action) { }
+
+ InputLoc loc;
+ AugType type;
+ int localErrKey;
+ Action *action;
+};
+
+struct Token
+{
+ char *data;
+ int length;
+ InputLoc loc;
+
+ void prepareLitString( Token &result, bool &caseInsensitive );
+ void append( const Token &other );
+ void set(const char *str, int len );
+};
+
+/* Store the value and type of a priority augmentation. */
+struct PriorityAug
+{
+ PriorityAug( AugType type, int priorKey, int priorValue ) :
+ type(type), priorKey(priorKey), priorValue(priorValue) { }
+
+ AugType type;
+ int priorKey;
+ int priorValue;
+};
+
+/*
+ * A Variable Definition
+ */
+struct VarDef
+{
+ VarDef(const char *name, JoinOrLm *joinOrLm )
+ : name(name), joinOrLm(joinOrLm), isExport(false) { }
+
+ /* Parse tree traversal. */
+ FsmAp *walk( ParseData *pd );
+ void makeNameTree( const InputLoc &loc, ParseData *pd );
+ void resolveNameRefs( ParseData *pd );
+
+ const char *name;
+ JoinOrLm *joinOrLm;
+ bool isExport;
+};
+
+
+/*
+ * LongestMatch
+ *
+ * Wherever possible the item match will execute on the character. If not
+ * possible the item match will execute on a lookahead character and either
+ * hold the current char (if one away) or backup.
+ *
+ * How to handle the problem of backing up over a buffer break?
+ *
+ * Don't want to use pending out transitions for embedding item match because
+ * the role of item match action is different: it may sometimes match on the
+ * final transition, or may match on a lookahead character.
+ *
+ * Don't want to invent a new operator just for this. So just trail action
+ * after machine, this means we can only use literal actions.
+ *
+ * The item action may
+ *
+ * What states of the machine will be final. The item actions that wrap around
+ * on the last character will go straight to the start state.
+ *
+ * Some transitions will be lookahead transitions, they will hold the current
+ * character. Crossing them with regular transitions must be restricted
+ * because it does not make sense. The transition cannot simultaneously hold
+ * and consume the current character.
+ */
+struct LongestMatchPart
+{
+ LongestMatchPart( Join *join, Action *action,
+ InputLoc &semiLoc, int longestMatchId )
+ :
+ join(join), action(action), semiLoc(semiLoc),
+ longestMatchId(longestMatchId), inLmSelect(false) { }
+
+ InputLoc getLoc();
+
+ Join *join;
+ Action *action;
+ InputLoc semiLoc;
+
+ Action *setActId;
+ Action *actOnLast;
+ Action *actOnNext;
+ Action *actLagBehind;
+ int longestMatchId;
+ bool inLmSelect;
+ LongestMatch *longestMatch;
+
+ LongestMatchPart *prev, *next;
+};
+
+/* Declare a new type so that ptreetypes.h need not include dlist.h. */
+struct LmPartList : DList<LongestMatchPart> {};
+
+struct LongestMatch
+{
+ /* Construct with a list of joins */
+ LongestMatch( const InputLoc &loc, LmPartList *longestMatchList ) :
+ loc(loc), longestMatchList(longestMatchList), name(0),
+ lmSwitchHandlesError(false) { }
+
+ /* Tree traversal. */
+ FsmAp *walk( ParseData *pd );
+ void makeNameTree( ParseData *pd );
+ void resolveNameRefs( ParseData *pd );
+ void runLonestMatch( ParseData *pd, FsmAp *graph );
+ Action *newAction( ParseData *pd, const InputLoc &loc, const char *name,
+ InlineList *inlineList );
+ void makeActions( ParseData *pd );
+ void findName( ParseData *pd );
+ void restart( FsmAp *graph, TransAp *trans );
+
+ InputLoc loc;
+ LmPartList *longestMatchList;
+ const char *name;
+
+ Action *lmActSelect;
+ bool lmSwitchHandlesError;
+
+ LongestMatch *next, *prev;
+};
+
+
+/* List of Expressions. */
+typedef DList<Expression> ExprList;
+
+struct JoinOrLm
+{
+ enum Type {
+ JoinType,
+ LongestMatchType
+ };
+
+ JoinOrLm( Join *join ) :
+ join(join), type(JoinType) {}
+ JoinOrLm( LongestMatch *longestMatch ) :
+ longestMatch(longestMatch), type(LongestMatchType) {}
+
+ FsmAp *walk( ParseData *pd );
+ void makeNameTree( ParseData *pd );
+ void resolveNameRefs( ParseData *pd );
+
+ Join *join;
+ LongestMatch *longestMatch;
+ Type type;
+};
+
+/*
+ * Join
+ */
+struct Join
+{
+ /* Construct with the first expression. */
+ Join( Expression *expr );
+ Join( const InputLoc &loc, Expression *expr );
+
+ /* Tree traversal. */
+ FsmAp *walk( ParseData *pd );
+ FsmAp *walkJoin( ParseData *pd );
+ void makeNameTree( ParseData *pd );
+ void resolveNameRefs( ParseData *pd );
+
+ /* Data. */
+ InputLoc loc;
+ ExprList exprList;
+};
+
+/*
+ * Expression
+ */
+struct Expression
+{
+ enum Type {
+ OrType,
+ IntersectType,
+ SubtractType,
+ StrongSubtractType,
+ TermType,
+ BuiltinType
+ };
+
+ /* Construct with an expression on the left and a term on the right. */
+ Expression( Expression *expression, Term *term, Type type ) :
+ expression(expression), term(term),
+ builtin(builtin), type(type), prev(this), next(this) { }
+
+ /* Construct with only a term. */
+ Expression( Term *term ) :
+ expression(0), term(term), builtin(builtin),
+ type(TermType) , prev(this), next(this) { }
+
+ /* Construct with a builtin type. */
+ Expression( BuiltinMachine builtin ) :
+ expression(0), term(0), builtin(builtin),
+ type(BuiltinType), prev(this), next(this) { }
+
+ ~Expression();
+
+ /* Tree traversal. */
+ FsmAp *walk( ParseData *pd, bool lastInSeq = true );
+ void makeNameTree( ParseData *pd );
+ void resolveNameRefs( ParseData *pd );
+
+ /* Node data. */
+ Expression *expression;
+ Term *term;
+ BuiltinMachine builtin;
+ Type type;
+
+ Expression *prev, *next;
+};
+
+/*
+ * Term
+ */
+struct Term
+{
+ enum Type {
+ ConcatType,
+ RightStartType,
+ RightFinishType,
+ LeftType,
+ FactorWithAugType
+ };
+
+ Term( Term *term, FactorWithAug *factorWithAug ) :
+ term(term), factorWithAug(factorWithAug), type(ConcatType) { }
+
+ Term( Term *term, FactorWithAug *factorWithAug, Type type ) :
+ term(term), factorWithAug(factorWithAug), type(type) { }
+
+ Term( FactorWithAug *factorWithAug ) :
+ term(0), factorWithAug(factorWithAug), type(FactorWithAugType) { }
+
+ ~Term();
+
+ FsmAp *walk( ParseData *pd, bool lastInSeq = true );
+ void makeNameTree( ParseData *pd );
+ void resolveNameRefs( ParseData *pd );
+
+ Term *term;
+ FactorWithAug *factorWithAug;
+ Type type;
+
+ /* Priority descriptor for RightFinish type. */
+ PriorDesc priorDescs[2];
+};
+
+
+/* Third level of precedence. Augmenting nodes with actions and priorities. */
+struct FactorWithAug
+{
+ FactorWithAug( FactorWithRep *factorWithRep ) :
+ priorDescs(0), factorWithRep(factorWithRep) { }
+ ~FactorWithAug();
+
+ /* Tree traversal. */
+ FsmAp *walk( ParseData *pd );
+ void makeNameTree( ParseData *pd );
+ void resolveNameRefs( ParseData *pd );
+
+ void assignActions( ParseData *pd, FsmAp *graph, int *actionOrd );
+ void assignPriorities( FsmAp *graph, int *priorOrd );
+
+ void assignConditions( FsmAp *graph );
+
+ /* Actions and priorities assigned to the factor node. */
+ Vector<ParserAction> actions;
+ Vector<PriorityAug> priorityAugs;
+ PriorDesc *priorDescs;
+ Vector<Label> labels;
+ Vector<EpsilonLink> epsilonLinks;
+ Vector<ParserAction> conditions;
+
+ FactorWithRep *factorWithRep;
+};
+
+/* Fourth level of precedence. Trailing unary operators. Provide kleen star,
+ * optional and plus. */
+struct FactorWithRep
+{
+ enum Type {
+ StarType,
+ StarStarType,
+ OptionalType,
+ PlusType,
+ ExactType,
+ MaxType,
+ MinType,
+ RangeType,
+ FactorWithNegType
+ };
+
+ FactorWithRep( const InputLoc &loc, FactorWithRep *factorWithRep,
+ int lowerRep, int upperRep, Type type ) :
+ loc(loc), factorWithRep(factorWithRep),
+ factorWithNeg(0), lowerRep(lowerRep),
+ upperRep(upperRep), type(type) { }
+
+ FactorWithRep( FactorWithNeg *factorWithNeg )
+ : factorWithNeg(factorWithNeg), type(FactorWithNegType) { }
+
+ ~FactorWithRep();
+
+ /* Tree traversal. */
+ FsmAp *walk( ParseData *pd );
+ void makeNameTree( ParseData *pd );
+ void resolveNameRefs( ParseData *pd );
+
+ InputLoc loc;
+ FactorWithRep *factorWithRep;
+ FactorWithNeg *factorWithNeg;
+ int lowerRep, upperRep;
+ Type type;
+
+ /* Priority descriptor for StarStar type. */
+ PriorDesc priorDescs[2];
+};
+
+/* Fifth level of precedence. Provides Negation. */
+struct FactorWithNeg
+{
+ enum Type {
+ NegateType,
+ CharNegateType,
+ FactorType
+ };
+
+ FactorWithNeg( const InputLoc &loc, FactorWithNeg *factorWithNeg, Type type) :
+ loc(loc), factorWithNeg(factorWithNeg), factor(0), type(type) { }
+
+ FactorWithNeg( Factor *factor ) :
+ factorWithNeg(0), factor(factor), type(FactorType) { }
+
+ ~FactorWithNeg();
+
+ /* Tree traversal. */
+ FsmAp *walk( ParseData *pd );
+ void makeNameTree( ParseData *pd );
+ void resolveNameRefs( ParseData *pd );
+
+ InputLoc loc;
+ FactorWithNeg *factorWithNeg;
+ Factor *factor;
+ Type type;
+};
+
+/*
+ * Factor
+ */
+struct Factor
+{
+ /* Language elements a factor node can be. */
+ enum Type {
+ LiteralType,
+ RangeType,
+ OrExprType,
+ RegExprType,
+ ReferenceType,
+ ParenType,
+ LongestMatchType,
+ };
+
+ /* Construct with a literal fsm. */
+ Factor( Literal *literal ) :
+ literal(literal), type(LiteralType) { }
+
+ /* Construct with a range. */
+ Factor( Range *range ) :
+ range(range), type(RangeType) { }
+
+ /* Construct with the or part of a regular expression. */
+ Factor( ReItem *reItem ) :
+ reItem(reItem), type(OrExprType) { }
+
+ /* Construct with a regular expression. */
+ Factor( RegExpr *regExpr ) :
+ regExpr(regExpr), type(RegExprType) { }
+
+ /* Construct with a reference to a var def. */
+ Factor( const InputLoc &loc, VarDef *varDef ) :
+ loc(loc), varDef(varDef), type(ReferenceType) {}
+
+ /* Construct with a parenthesized join. */
+ Factor( Join *join ) :
+ join(join), type(ParenType) {}
+
+ /* Construct with a longest match operator. */
+ Factor( LongestMatch *longestMatch ) :
+ longestMatch(longestMatch), type(LongestMatchType) {}
+
+ /* Cleanup. */
+ ~Factor();
+
+ /* Tree traversal. */
+ FsmAp *walk( ParseData *pd );
+ void makeNameTree( ParseData *pd );
+ void resolveNameRefs( ParseData *pd );
+
+ InputLoc loc;
+ Literal *literal;
+ Range *range;
+ ReItem *reItem;
+ RegExpr *regExpr;
+ VarDef *varDef;
+ Join *join;
+ LongestMatch *longestMatch;
+ int lower, upper;
+ Type type;
+};
+
+/* A range machine. Only ever composed of two literals. */
+struct Range
+{
+ Range( Literal *lowerLit, Literal *upperLit )
+ : lowerLit(lowerLit), upperLit(upperLit) { }
+
+ ~Range();
+ FsmAp *walk( ParseData *pd );
+
+ Literal *lowerLit;
+ Literal *upperLit;
+};
+
+/* Some literal machine. Can be a number or literal string. */
+struct Literal
+{
+ enum LiteralType { Number, LitString };
+
+ Literal( const Token &token, LiteralType type )
+ : token(token), type(type) { }
+
+ FsmAp *walk( ParseData *pd );
+
+ Token token;
+ LiteralType type;
+};
+
+/* Regular expression. */
+struct RegExpr
+{
+ enum RegExpType { RecurseItem, Empty };
+
+ /* Constructors. */
+ RegExpr() :
+ type(Empty), caseInsensitive(false) { }
+ RegExpr(RegExpr *regExpr, ReItem *item) :
+ regExpr(regExpr), item(item),
+ type(RecurseItem), caseInsensitive(false) { }
+
+ ~RegExpr();
+ FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
+
+ RegExpr *regExpr;
+ ReItem *item;
+ RegExpType type;
+ bool caseInsensitive;
+};
+
+/* An item in a regular expression. */
+struct ReItem
+{
+ enum ReItemType { Data, Dot, OrBlock, NegOrBlock };
+
+ ReItem( const InputLoc &loc, const Token &token )
+ : loc(loc), token(token), star(false), type(Data) { }
+ ReItem( const InputLoc &loc, ReItemType type )
+ : loc(loc), star(false), type(type) { }
+ ReItem( const InputLoc &loc, ReOrBlock *orBlock, ReItemType type )
+ : loc(loc), orBlock(orBlock), star(false), type(type) { }
+
+ ~ReItem();
+ FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
+
+ InputLoc loc;
+ Token token;
+ ReOrBlock *orBlock;
+ bool star;
+ ReItemType type;
+};
+
+/* An or block item. */
+struct ReOrBlock
+{
+ enum ReOrBlockType { RecurseItem, Empty };
+
+ /* Constructors. */
+ ReOrBlock()
+ : type(Empty) { }
+ ReOrBlock(ReOrBlock *orBlock, ReOrItem *item)
+ : orBlock(orBlock), item(item), type(RecurseItem) { }
+
+ ~ReOrBlock();
+ FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
+
+ ReOrBlock *orBlock;
+ ReOrItem *item;
+ ReOrBlockType type;
+};
+
+/* An item in an or block. */
+struct ReOrItem
+{
+ enum ReOrItemType { Data, Range };
+
+ ReOrItem( const InputLoc &loc, const Token &token )
+ : loc(loc), token(token), type(Data) {}
+ ReOrItem( const InputLoc &loc, char lower, char upper )
+ : loc(loc), lower(lower), upper(upper), type(Range) { }
+
+ FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
+
+ InputLoc loc;
+ Token token;
+ char lower;
+ char upper;
+ ReOrItemType type;
+};
+
+
+/*
+ * Inline code tree
+ */
+struct InlineList;
+struct InlineItem
+{
+ enum Type
+ {
+ Text, Goto, Call, Next, GotoExpr, CallExpr, NextExpr, Ret, PChar,
+ Char, Hold, Curs, Targs, Entry, Exec, LmSwitch, LmSetActId,
+ LmSetTokEnd, LmOnLast, LmOnNext, LmOnLagBehind, LmInitAct,
+ LmInitTokStart, LmSetTokStart, Break
+ };
+
+ InlineItem( const InputLoc &loc, char *data, Type type ) :
+ loc(loc), data(data), nameRef(0), children(0), type(type) { }
+
+ InlineItem( const InputLoc &loc, NameRef *nameRef, Type type ) :
+ loc(loc), data(0), nameRef(nameRef), children(0), type(type) { }
+
+ InlineItem( const InputLoc &loc, LongestMatch *longestMatch,
+ LongestMatchPart *longestMatchPart, Type type ) : loc(loc), data(0),
+ nameRef(0), children(0), longestMatch(longestMatch),
+ longestMatchPart(longestMatchPart), type(type) { }
+
+ InlineItem( const InputLoc &loc, NameInst *nameTarg, Type type ) :
+ loc(loc), data(0), nameRef(0), nameTarg(nameTarg), children(0),
+ type(type) { }
+
+ InlineItem( const InputLoc &loc, Type type ) :
+ loc(loc), data(0), nameRef(0), children(0), type(type) { }
+
+ InputLoc loc;
+ char *data;
+ NameRef *nameRef;
+ NameInst *nameTarg;
+ InlineList *children;
+ LongestMatch *longestMatch;
+ LongestMatchPart *longestMatchPart;
+ Type type;
+
+ InlineItem *prev, *next;
+};
+
+/* Normally this would be atypedef, but that would entail including DList from
+ * ptreetypes, which should be just typedef forwards. */
+struct InlineList : public DList<InlineItem> { };
+
+
+
+#endif /* _PARSETREE_H */
diff --git a/contrib/tools/ragel5/ragel/ragel.h b/contrib/tools/ragel5/ragel/ragel.h
new file mode 100644
index 0000000000..736369c0ce
--- /dev/null
+++ b/contrib/tools/ragel5/ragel/ragel.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright 2001-2007 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _RAGEL_H
+#define _RAGEL_H
+
+#include <stdio.h>
+#include <iostream>
+#include <fstream>
+#include <string>
+#include "config.h"
+
+#define PROGNAME "ragel"
+
+/* To what degree are machine minimized. */
+enum MinimizeLevel {
+ MinimizeApprox,
+ MinimizeStable,
+ MinimizePartition1,
+ MinimizePartition2
+};
+
+enum MinimizeOpt {
+ MinimizeNone,
+ MinimizeEnd,
+ MinimizeMostOps,
+ MinimizeEveryOp
+};
+
+/* Options. */
+extern MinimizeLevel minimizeLevel;
+extern MinimizeOpt minimizeOpt;
+extern char *machineSpec, *machineName;
+extern bool printStatistics;
+
+extern int gblErrorCount;
+extern char mainMachine[];
+
+/* Location in an input file. */
+struct InputLoc
+{
+ const char *fileName;
+ int line;
+ int col;
+};
+
+/* Error reporting. */
+std::ostream &error();
+std::ostream &error( const InputLoc &loc );
+std::ostream &warning( const InputLoc &loc );
+
+void terminateAllParsers( );
+void writeMachines( std::ostream &out, std::string hostData, const char *inputFileName );
+void xmlEscapeHost( std::ostream &out, char *data, int len );
+
+#endif /* _RAGEL_H */
diff --git a/contrib/tools/ragel5/ragel/rlparse.cpp b/contrib/tools/ragel5/ragel/rlparse.cpp
new file mode 100644
index 0000000000..cd6fbde218
--- /dev/null
+++ b/contrib/tools/ragel5/ragel/rlparse.cpp
@@ -0,0 +1,6088 @@
+/* Automatically generated by Kelbt from "rlparse.kl".
+ *
+ * Parts of this file are copied from Kelbt source covered by the GNU
+ * GPL. As a special exception, you may use the parts of this file copied
+ * from Kelbt source without restriction. The remainder is derived from
+ * "rlparse.kl" and inherits the copyright status of that file.
+ */
+
+#line 1 "rlparse.kl"
+/*
+ * Copyright 2001-2007 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "rlparse.h"
+#include "ragel.h"
+#include <iostream>
+#include <errno.h>
+
+#include <stdlib.h>
+//#include <malloc.h>
+
+using std::cout;
+using std::cerr;
+using std::endl;
+
+ParserDict parserDict;
+
+#line 93 "rlparse.kh"
+#line 96 "rlparse.kh"
+#line 126 "rlparse.kh"
+#line 1370 "rlparse.kl"
+
+
+#line 50 "rlparse.cpp"
+struct Parser_Lel_action_ref
+{
+#line 682 "rlparse.kl"
+
+ Action *action;
+
+
+#line 57 "rlparse.cpp"
+};
+
+struct Parser_Lel_aug_type
+{
+#line 475 "rlparse.kl"
+
+ InputLoc loc;
+ AugType augType;
+
+
+#line 68 "rlparse.cpp"
+};
+
+struct Parser_Lel_expression
+{
+#line 297 "rlparse.kl"
+
+ Expression *expression;
+
+
+#line 78 "rlparse.cpp"
+};
+
+struct Parser_Lel_factor
+{
+#line 907 "rlparse.kl"
+
+ Factor *factor;
+
+
+#line 88 "rlparse.cpp"
+};
+
+struct Parser_Lel_factor_rep_num
+{
+#line 861 "rlparse.kl"
+
+ int rep;
+
+
+#line 98 "rlparse.cpp"
+};
+
+struct Parser_Lel_factor_with_aug
+{
+#line 392 "rlparse.kl"
+
+ FactorWithAug *factorWithAug;
+
+
+#line 108 "rlparse.cpp"
+};
+
+struct Parser_Lel_factor_with_ep
+{
+#line 376 "rlparse.kl"
+
+ FactorWithAug *factorWithAug;
+
+
+#line 118 "rlparse.cpp"
+};
+
+struct Parser_Lel_factor_with_label
+{
+#line 360 "rlparse.kl"
+
+ FactorWithAug *factorWithAug;
+
+
+#line 128 "rlparse.cpp"
+};
+
+struct Parser_Lel_factor_with_neg
+{
+#line 887 "rlparse.kl"
+
+ FactorWithNeg *factorWithNeg;
+
+
+#line 138 "rlparse.cpp"
+};
+
+struct Parser_Lel_factor_with_rep
+{
+#line 811 "rlparse.kl"
+
+ FactorWithRep *factorWithRep;
+
+
+#line 148 "rlparse.cpp"
+};
+
+struct Parser_Lel_inline_item
+{
+#line 1160 "rlparse.kl"
+
+ InlineItem *inlineItem;
+
+
+#line 158 "rlparse.cpp"
+};
+
+struct Parser_Lel_inline_list
+{
+#line 1139 "rlparse.kl"
+
+ InlineList *inlineList;
+
+
+#line 168 "rlparse.cpp"
+};
+
+struct Parser_Lel_join
+{
+#line 281 "rlparse.kl"
+
+ Join *join;
+
+
+#line 178 "rlparse.cpp"
+};
+
+struct Parser_Lel_join_or_lm
+{
+#line 204 "rlparse.kl"
+
+ JoinOrLm *joinOrLm;
+
+
+#line 188 "rlparse.cpp"
+};
+
+struct Parser_Lel_lm_part_list
+{
+#line 224 "rlparse.kl"
+
+ LmPartList *lmPartList;
+
+
+#line 198 "rlparse.cpp"
+};
+
+struct Parser_Lel_local_err_name
+{
+#line 790 "rlparse.kl"
+
+ int error_name;
+
+
+#line 208 "rlparse.cpp"
+};
+
+struct Parser_Lel_longest_match_part
+{
+#line 243 "rlparse.kl"
+
+ LongestMatchPart *lmPart;
+
+
+#line 218 "rlparse.cpp"
+};
+
+struct Parser_Lel_opt_export
+{
+#line 64 "rlparse.kl"
+
+ bool isSet;
+
+
+#line 228 "rlparse.cpp"
+};
+
+struct Parser_Lel_opt_lm_part_action
+{
+#line 262 "rlparse.kl"
+
+ Action *action;
+
+
+#line 238 "rlparse.cpp"
+};
+
+struct Parser_Lel_priority_aug
+{
+#line 741 "rlparse.kl"
+
+ int priorityNum;
+
+
+#line 248 "rlparse.cpp"
+};
+
+struct Parser_Lel_priority_name
+{
+#line 723 "rlparse.kl"
+
+ int priorityName;
+
+
+#line 258 "rlparse.cpp"
+};
+
+struct Parser_Lel_range_lit
+{
+#line 975 "rlparse.kl"
+
+ Literal *literal;
+
+
+#line 268 "rlparse.cpp"
+};
+
+struct Parser_Lel_regular_expr
+{
+#line 1013 "rlparse.kl"
+
+ RegExpr *regExpr;
+
+
+#line 278 "rlparse.cpp"
+};
+
+struct Parser_Lel_regular_expr_char
+{
+#line 1062 "rlparse.kl"
+
+ ReItem *reItem;
+
+
+#line 288 "rlparse.cpp"
+};
+
+struct Parser_Lel_regular_expr_item
+{
+#line 1046 "rlparse.kl"
+
+ ReItem *reItem;
+
+
+#line 298 "rlparse.cpp"
+};
+
+struct Parser_Lel_regular_expr_or_char
+{
+#line 1121 "rlparse.kl"
+
+ ReOrItem *reOrItem;
+
+
+#line 308 "rlparse.cpp"
+};
+
+struct Parser_Lel_regular_expr_or_data
+{
+#line 1088 "rlparse.kl"
+
+ ReOrBlock *reOrBlock;
+
+
+#line 318 "rlparse.cpp"
+};
+
+struct Parser_Lel_term
+{
+#line 329 "rlparse.kl"
+
+ Term *term;
+
+
+#line 328 "rlparse.cpp"
+};
+
+struct Parser_Lel_token_type
+{
+#line 104 "rlparse.kl"
+
+ Token token;
+
+
+#line 338 "rlparse.cpp"
+};
+
+union Parser_UserData
+{
+ struct Parser_Lel_action_ref action_ref;
+ struct Parser_Lel_aug_type aug_type;
+ struct Parser_Lel_expression expression;
+ struct Parser_Lel_factor factor;
+ struct Parser_Lel_factor_rep_num factor_rep_num;
+ struct Parser_Lel_factor_with_aug factor_with_aug;
+ struct Parser_Lel_factor_with_ep factor_with_ep;
+ struct Parser_Lel_factor_with_label factor_with_label;
+ struct Parser_Lel_factor_with_neg factor_with_neg;
+ struct Parser_Lel_factor_with_rep factor_with_rep;
+ struct Parser_Lel_inline_item inline_item;
+ struct Parser_Lel_inline_list inline_list;
+ struct Parser_Lel_join join;
+ struct Parser_Lel_join_or_lm join_or_lm;
+ struct Parser_Lel_lm_part_list lm_part_list;
+ struct Parser_Lel_local_err_name local_err_name;
+ struct Parser_Lel_longest_match_part longest_match_part;
+ struct Parser_Lel_opt_export opt_export;
+ struct Parser_Lel_opt_lm_part_action opt_lm_part_action;
+ struct Parser_Lel_priority_aug priority_aug;
+ struct Parser_Lel_priority_name priority_name;
+ struct Parser_Lel_range_lit range_lit;
+ struct Parser_Lel_regular_expr regular_expr;
+ struct Parser_Lel_regular_expr_char regular_expr_char;
+ struct Parser_Lel_regular_expr_item regular_expr_item;
+ struct Parser_Lel_regular_expr_or_char regular_expr_or_char;
+ struct Parser_Lel_regular_expr_or_data regular_expr_or_data;
+ struct Parser_Lel_term term;
+ struct Parser_Lel_token_type token_type;
+ struct Token token;
+};
+
+struct Parser_LangEl
+{
+ char *file;
+ int line;
+ int type;
+ int reduction;
+ int state;
+ union Parser_UserData user;
+ unsigned int retry;
+ struct Parser_LangEl *next, *child;
+};
+
+#line 388 "rlparse.cpp"
+unsigned int Parser_startState = 0;
+
+short Parser_indicies[] = {
+ 151, -1, -1, -1, -1, -1, 151, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 151, 151, 151, 151, -1, -1,
+ -1, -1, -1, -1, 151, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 151, 151, -1, 151, 1, 0, 393,
+ 153, -1, -1, -1, -1, -1, 153, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 153, 153, 153, 153, -1, -1,
+ -1, -1, -1, -1, 153, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 153, 153, -1, 149, -1, -1, 2,
+ 157, -1, -1, -1, -1, -1, 150, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 4, 5, 6, 7, -1, -1,
+ -1, -1, -1, -1, 154, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 9, 8, -1, -1, -1, -1, -1,
+ 152, 384, 385, 386, 387, 388, 389, 390,
+ 391, 392, 10, 3, 161, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 24, 11, 12, 14, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 318, 320, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 13, 356, 356, 356, -1, 356,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 356, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 356,
+ -1, -1, -1, -1, -1, -1, 356, 356,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 356, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, 356, 356, 356,
+ 356, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 356,
+ 356, -1, -1, -1, 356, 356, 356, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 20, 356, 356, 356, -1, 356, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 356, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, 356, -1, -1,
+ -1, -1, -1, -1, 356, 356, -1, -1,
+ -1, -1, -1, -1, -1, -1, 356, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 356, 356, 356, 356, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, 356, 356, -1,
+ -1, -1, 356, 356, 356, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 22, 170,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, 170, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 21, 23, -1, -1, -1, -1, -1,
+ -1, -1, -1, 155, 25, 164, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 26, 14,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, 318, 320, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 27, 319,
+ 368, 369, 370, -1, 367, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 166, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 366, -1, -1, -1,
+ -1, -1, -1, 364, 365, -1, -1, -1,
+ -1, -1, -1, -1, -1, 371, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 360, 361, 362, 363, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 372, 373, -1, -1,
+ -1, 374, 375, 28, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 357, -1, 359, -1, 355, 358,
+ 29, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 169, 368,
+ 369, 370, -1, 367, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 167, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 366, -1, -1, -1, -1,
+ -1, -1, 364, 365, -1, -1, -1, -1,
+ -1, -1, -1, -1, 371, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 360, 361, 362, 363, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 372, 373, -1, -1, -1,
+ 374, 375, 28, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 357, -1, 359, -1, 355, 358, 153,
+ -1, -1, -1, -1, -1, -1, 153, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 153, 153, 153, 153, -1, -1, -1, -1,
+ -1, -1, 153, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 153,
+ 153, -1, -1, -1, -1, 30, 31, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 32, 334, 334, 334, -1, 334,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 334, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 334, -1, -1, -1, -1, -1, -1, 334,
+ -1, -1, -1, -1, -1, -1, 334, 334,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 334, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, 334, 334, 334,
+ 334, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 334, 334, 334, 334,
+ 334, 334, 334, 334, 334, 334, 334, 334,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 33, 163,
+ 165, 34, 356, 356, 356, -1, 356, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, 356, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 356, -1,
+ -1, -1, -1, -1, -1, 356, 356, -1,
+ -1, -1, -1, -1, -1, -1, -1, 356,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 356, 356, 356, 356,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 356, 356,
+ -1, -1, -1, 356, 356, 356, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 35,
+ 158, -1, -1, -1, -1, -1, -1, 157,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 4, 5, 6, 7, -1, -1, -1,
+ -1, -1, -1, 154, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 9, 8, -1, -1, -1, -1, -1, 152,
+ 384, 385, 386, 387, 388, 389, 390, 391,
+ 392, 10, 3, 44, -1, -1, -1, -1,
+ -1, -1, 52, -1, -1, -1, -1, 14,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 45, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 39, 46,
+ -1, -1, -1, -1, -1, 318, 320, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 50, 48, 49, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 36, -1, -1, 47, -1,
+ -1, -1, -1, -1, -1, -1, 37, 38,
+ 193, 41, -1, 42, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 43, -1,
+ -1, -1, 300, 304, -1, -1, 51, 44,
+ -1, -1, -1, -1, -1, -1, 52, -1,
+ -1, -1, -1, 14, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 45, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 39, 46, -1, -1, -1, -1,
+ -1, 318, 320, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 55, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 50, 48, 49, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 54,
+ 53, -1, 47, -1, -1, -1, -1, -1,
+ -1, -1, 37, 38, 193, 41, -1, 42,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 43, -1, -1, -1, 300, 304,
+ -1, -1, 51, 340, 341, 342, -1, 338,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 339, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 162, -1, -1, -1, -1, -1, -1, 366,
+ -1, -1, -1, -1, -1, -1, 364, 365,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 343, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, 360, 361, 362,
+ 363, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 62, 57, 56, 372,
+ 373, 58, 60, 61, 374, 375, 28, 59,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 333, 337, 335, 336, 344,
+ 381, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 380, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 63, -1, -1, -1, -1, 64, 368,
+ 369, 370, -1, 367, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 168, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 366, -1, -1, -1, -1,
+ -1, -1, 364, 365, -1, -1, -1, -1,
+ -1, -1, -1, -1, 371, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 360, 361, 362, 363, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 372, 373, -1, -1, -1,
+ 374, 375, 28, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 357, -1, 359, -1, 355, 358, 70,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 159, 72,
+ -1, -1, 182, -1, -1, 182, 73, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 182, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 182, 71, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 182, -1, -1, -1,
+ 74, 44, -1, -1, -1, -1, 187, -1,
+ 52, 187, -1, -1, 187, 19, 75, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 187, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 45, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 187, 187, -1, -1, -1,
+ -1, -1, -1, -1, 39, 46, -1, -1,
+ -1, -1, -1, 318, 320, -1, -1, 76,
+ 77, 78, -1, 187, -1, -1, -1, 187,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 50, 48, 49, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 47, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 188, 41,
+ -1, 42, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 43, -1, -1, -1,
+ 300, 304, -1, -1, 51, 307, -1, -1,
+ 307, 307, 307, -1, 307, 307, 307, 307,
+ 307, 307, 307, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 66, 307,
+ 307, -1, 307, 307, 307, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 307, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 307,
+ 307, -1, -1, -1, -1, -1, -1, -1,
+ 307, 307, -1, -1, -1, -1, -1, 307,
+ 307, -1, -1, 307, 307, 307, 307, 307,
+ 307, -1, -1, 307, 307, 307, 307, 307,
+ 307, 307, 307, 307, 307, 307, 307, 307,
+ 307, 307, 307, 307, 307, 307, 307, 307,
+ 307, 307, 307, 307, 307, 307, 307, 307,
+ 307, 307, 307, 307, 307, 307, 307, 307,
+ 307, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 307, 195,
+ -1, -1, -1, -1, 195, -1, 195, 195,
+ -1, -1, 195, 195, 195, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 195, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 195, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 195, 195, -1, -1, -1, -1, -1,
+ -1, -1, 195, 195, -1, -1, -1, -1,
+ -1, 195, 195, -1, -1, 195, 195, 195,
+ 79, 195, -1, -1, -1, 195, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 195, 195, 195, 197, -1, -1, 89, 88,
+ 197, -1, 197, 197, -1, -1, 197, 197,
+ 197, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, 197, 91, -1,
+ 90, -1, 87, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 197, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, 197, 197, -1,
+ -1, -1, -1, -1, -1, -1, 197, 197,
+ -1, -1, -1, -1, -1, 197, 197, -1,
+ -1, 197, 197, 197, 197, 197, -1, -1,
+ -1, 197, 213, 215, 217, 92, 256, 260,
+ 262, 264, 258, 266, 268, 272, 274, 276,
+ 270, 278, 244, 248, 250, 252, 246, 254,
+ 220, 224, 226, 228, 222, 230, 232, 236,
+ 238, 240, 234, 242, 197, 197, 197, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 219, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 80, -1, -1, 81,
+ 82, 83, 84, 85, 86, 208, -1, -1,
+ 208, 208, 208, -1, 208, 208, 292, 295,
+ 208, 208, 208, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 208,
+ 208, -1, 208, 294, 208, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 208, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 93,
+ 208, -1, -1, -1, -1, -1, -1, -1,
+ 208, 208, -1, -1, -1, -1, -1, 208,
+ 208, -1, -1, 208, 208, 208, 208, 208,
+ 293, -1, -1, 208, 208, 208, 208, 208,
+ 208, 208, 208, 208, 208, 208, 208, 208,
+ 208, 208, 208, 208, 208, 208, 208, 208,
+ 208, 208, 208, 208, 208, 208, 208, 208,
+ 208, 208, 208, 208, 208, 208, 208, 208,
+ 208, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 208, 44,
+ -1, -1, -1, -1, -1, -1, 52, -1,
+ -1, -1, -1, 14, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 45, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 308, 46, -1, -1, -1, -1,
+ -1, 318, 320, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 50, 48, 49, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 47, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 302, 304,
+ -1, -1, 51, 44, -1, -1, -1, -1,
+ -1, -1, 52, -1, -1, -1, -1, 14,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 45, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 308, 46,
+ -1, -1, -1, -1, -1, 318, 320, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 50, 48, 49, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 47, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 303, 304, -1, -1, 51, 305,
+ -1, -1, 305, 305, 305, -1, 305, 305,
+ 305, 305, 305, 305, 305, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 305, 305, -1, 305, 305, 305, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 305, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 305, 305, -1, -1, -1, -1, -1,
+ -1, -1, 305, 305, -1, -1, -1, -1,
+ -1, 305, 305, -1, 314, 305, 305, 305,
+ 305, 305, 305, -1, -1, 305, 305, 305,
+ 305, 305, 305, 305, 305, 305, 305, 305,
+ 305, 305, 305, 305, 305, 305, 305, 305,
+ 305, 305, 305, 305, 305, 305, 305, 305,
+ 305, 305, 305, 305, 305, 305, 305, 305,
+ 305, 305, 305, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 305, 306, -1, -1, 306, 306, 306, -1,
+ 306, 306, 306, 306, 306, 306, 306, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 306, 306, -1, 306, 306,
+ 306, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 306, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 306, 306, -1, -1, -1,
+ -1, -1, -1, -1, 306, 306, -1, -1,
+ -1, -1, -1, 306, 306, -1, 316, 306,
+ 306, 306, 306, 306, 306, -1, -1, 306,
+ 306, 306, 306, 306, 306, 306, 306, 306,
+ 306, 306, 306, 306, 306, 306, 306, 306,
+ 306, 306, 306, 306, 306, 306, 306, 306,
+ 306, 306, 306, 306, 306, 306, 306, 306,
+ 306, 306, 306, 306, 306, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 306, 330, -1, -1, -1, 330,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 68, 330, -1, -1, -1, 330, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 69, 322,
+ 322, 322, -1, 322, -1, -1, 322, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 67, 94, 44, -1, -1, -1, -1, -1,
+ -1, 52, -1, -1, -1, -1, 14, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 45,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, 39, 46, -1,
+ -1, -1, -1, -1, 318, 320, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 50, 48, 49, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 65, -1, -1, 47, -1, -1,
+ -1, -1, -1, -1, -1, 37, 38, 193,
+ 41, -1, 42, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, 43, -1, -1,
+ -1, 300, 304, -1, -1, 51, 160, 70,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 171, 44,
+ -1, -1, -1, -1, -1, -1, 52, -1,
+ -1, -1, -1, 14, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 45, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 40, 46, -1, -1, -1, -1,
+ -1, 318, 320, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 50, 48, 49, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 4, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 154, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 176, -1, 175, -1, -1,
+ -1, -1, -1, -1, 156, 97, -1, 96,
+ -1, -1, 47, -1, -1, 95, 174, -1,
+ -1, -1, 37, 38, 193, 41, -1, 42,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 43, -1, -1, -1, 300, 304,
+ -1, -1, 51, 345, 356, 356, 356, -1,
+ 356, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 356,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 356, -1, -1, -1, -1, -1, -1, 356,
+ 356, -1, -1, -1, -1, -1, -1, -1,
+ -1, 356, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 356, 356,
+ 356, 356, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 356, 356, -1, -1, -1, 356, 356, 356,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 98, 100, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 381, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, 380, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, 99, -1, -1,
+ -1, -1, 64, 104, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, 381, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 380, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 103, -1,
+ -1, -1, -1, 64, 102, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 381, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 380,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 101,
+ -1, -1, -1, -1, 64, 353, 354, 376,
+ 383, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, 105, 313, -1,
+ -1, 70, 44, -1, -1, -1, -1, -1,
+ -1, 52, -1, -1, -1, -1, 14, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 45,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, 39, 46, -1,
+ -1, -1, -1, -1, 318, 320, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 50, 48, 49, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, 47, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 194,
+ 41, -1, 42, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, 43, -1, -1,
+ -1, 300, 304, -1, -1, 51, 311, 107,
+ 108, -1, 327, -1, -1, 328, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 321, 106, 309, -1, -1, -1, 109,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, 329, 310, -1,
+ -1, -1, 109, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 329, 44, -1, -1, -1, -1, -1, -1,
+ 52, -1, -1, -1, -1, 14, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 45, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 39, 46, -1, -1,
+ -1, -1, -1, 318, 320, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 50, 48, 49, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 47, -1, -1, -1,
+ -1, -1, -1, -1, 110, 38, 193, 41,
+ -1, 42, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 43, -1, -1, -1,
+ 300, 304, -1, -1, 51, 44, -1, -1,
+ -1, -1, -1, -1, 52, -1, -1, -1,
+ -1, 14, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 45, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 39, 46, -1, -1, -1, -1, -1, 318,
+ 320, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 50, 48,
+ 49, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 47, -1, -1, -1, -1, -1, -1, -1,
+ -1, 113, 193, 41, -1, 42, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 43, -1, -1, -1, 300, 304, -1, -1,
+ 51, 44, -1, -1, -1, -1, -1, -1,
+ 52, -1, -1, -1, -1, 14, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 45, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 39, 46, -1, -1,
+ -1, -1, -1, 318, 320, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 50, 48, 49, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 47, -1, -1, -1,
+ -1, -1, -1, -1, -1, 111, 193, 41,
+ -1, 42, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 43, -1, -1, -1,
+ 300, 304, -1, -1, 51, 44, -1, -1,
+ -1, -1, -1, -1, 52, -1, -1, -1,
+ -1, 14, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 45, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 39, 46, -1, -1, -1, -1, -1, 318,
+ 320, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 50, 48,
+ 49, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 47, -1, -1, -1, -1, -1, -1, -1,
+ -1, 112, 193, 41, -1, 42, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 43, -1, -1, -1, 300, 304, -1, -1,
+ 51, 44, -1, -1, -1, -1, -1, -1,
+ 52, -1, -1, -1, -1, 14, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 45, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 39, 46, -1, -1,
+ -1, -1, -1, 318, 320, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 50, 48, 49, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 47, -1, -1, -1,
+ -1, -1, -1, -1, -1, 114, 193, 41,
+ -1, 42, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 43, -1, -1, -1,
+ 300, 304, -1, -1, 51, 44, -1, -1,
+ -1, -1, -1, -1, 52, -1, -1, -1,
+ -1, 14, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 45, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 39, 46, -1, -1, -1, -1, -1, 318,
+ 320, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 50, 48,
+ 49, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 47, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 189, 41, -1, 42, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 43, -1, -1, -1, 300, 304, -1, -1,
+ 51, 44, -1, -1, -1, -1, -1, -1,
+ 52, -1, -1, -1, -1, 14, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 45, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 39, 46, -1, -1,
+ -1, -1, -1, 318, 320, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 50, 48, 49, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 47, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 190, 41,
+ -1, 42, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 43, -1, -1, -1,
+ 300, 304, -1, -1, 51, 44, -1, -1,
+ -1, -1, -1, -1, 52, -1, -1, -1,
+ -1, 14, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 45, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 39, 46, -1, -1, -1, -1, -1, 318,
+ 320, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 50, 48,
+ 49, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 47, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 191, 41, -1, 42, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 43, -1, -1, -1, 300, 304, -1, -1,
+ 51, 44, -1, -1, -1, -1, -1, -1,
+ 52, -1, -1, -1, -1, 14, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 45, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 39, 46, -1, -1,
+ -1, -1, -1, 318, 320, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 50, 48, 49, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 47, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 192, 41,
+ -1, 42, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 43, -1, -1, -1,
+ 300, 304, -1, -1, 51, 378, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 196, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 115, 116, -1, -1, 118, -1, 119,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, 117, -1, -1,
+ -1, -1, -1, -1, -1, -1, 284, -1,
+ -1, -1, -1, -1, -1, 288, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 198, 282, -1, -1,
+ -1, -1, -1, -1, -1, 199, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 280,
+ 287, 120, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 117, -1, -1, -1,
+ -1, -1, -1, -1, -1, 284, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 201, 282, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 280, 120,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 117, -1, -1, -1, -1, -1,
+ -1, -1, -1, 284, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 202, 282, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 280, 120, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 117, -1, -1, -1, -1, -1, -1, -1,
+ -1, 284, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 203,
+ 282, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 280, 120, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 117, -1,
+ -1, -1, -1, -1, -1, -1, -1, 284,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, 204, 282, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 280, 120, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 117, -1, -1, -1,
+ -1, -1, -1, -1, -1, 284, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 205, 282, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 280, 121,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 117, -1, -1, -1, -1, -1,
+ -1, -1, -1, 284, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 206, 282, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 280, 209, -1, -1,
+ 209, -1, 209, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 209, -1, -1, -1, -1, -1, -1, -1,
+ -1, 209, -1, -1, -1, -1, -1, -1,
+ 209, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 253, 265, 277, 229, 241, 210, -1, -1,
+ 210, -1, 210, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 210, -1, -1, -1, -1, -1, -1, -1,
+ -1, 210, -1, -1, -1, -1, -1, -1,
+ 210, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 218,
+ 251, 263, 275, 227, 239, 211, -1, -1,
+ 211, -1, 211, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 211, -1, -1, -1, -1, -1, -1, -1,
+ -1, 211, -1, -1, -1, -1, -1, -1,
+ 211, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 216,
+ 249, 261, 273, 225, 237, 212, -1, -1,
+ 212, -1, 212, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 212, -1, -1, -1, -1, -1, -1, -1,
+ -1, 212, -1, -1, -1, -1, -1, -1,
+ 212, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 214,
+ 245, 257, 269, 221, 233, 247, 259, 271,
+ 223, 235, 255, 267, 279, 231, 243, 123,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 301, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 122, 14, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 315, -1, -1, -1, -1,
+ -1, 318, 320, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 317, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 312, 44, -1, -1, -1, -1,
+ -1, -1, 52, -1, 127, -1, -1, 14,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 45, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 40, 46,
+ -1, -1, -1, -1, -1, 318, 320, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 50, 48, 49, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 4, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 154, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 176,
+ -1, 175, -1, -1, -1, -1, -1, -1,
+ 156, 97, -1, 96, -1, -1, 47, -1,
+ -1, -1, 173, -1, -1, -1, 37, 38,
+ 193, 41, -1, 42, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 43, -1,
+ -1, -1, 300, 304, -1, -1, 51, 70,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 180, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 117, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 126, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 125, -1, 179, 161,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 124, 368, 369, 370, -1,
+ 367, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 346,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 366, -1, -1, -1, -1, -1, -1, 364,
+ 365, -1, -1, -1, -1, -1, -1, -1,
+ -1, 371, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 360, 361,
+ 362, 363, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 372, 373, -1, -1, -1, 374, 375, 28,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 357, -1,
+ 359, -1, 355, 358, 347, 356, 356, 356,
+ -1, 356, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 356, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 356, -1, -1, -1, -1, -1, -1,
+ 356, 356, -1, -1, -1, -1, -1, -1,
+ -1, -1, 356, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 356,
+ 356, 356, 356, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 356, 356, -1, -1, -1, 356, 356,
+ 356, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 128, 351, 356, 356, 356, -1,
+ 356, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 356,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 356, -1, -1, -1, -1, -1, -1, 356,
+ 356, -1, -1, -1, -1, -1, -1, -1,
+ -1, 356, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 356, 356,
+ 356, 356, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 356, 356, -1, -1, -1, 356, 356, 356,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 129, 349, 356, 356, 356, -1, 356,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 356, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 356,
+ -1, -1, -1, -1, -1, -1, 356, 356,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 356, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, 356, 356, 356,
+ 356, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 356,
+ 356, -1, -1, -1, 356, 356, 356, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 130, 379, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 379, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, 131, 324, 324,
+ 324, -1, 324, 323, -1, 324, 330, -1,
+ -1, -1, 330, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 132, 330, -1, -1, -1,
+ 330, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 133, 331, -1, -1, 134, 331, 72,
+ -1, -1, 181, -1, -1, 181, 73, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 181, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 181, 71, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 181, -1, -1, -1,
+ 74, 44, -1, -1, -1, -1, 184, -1,
+ 52, 184, -1, -1, 184, 16, 75, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 184, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 45, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 184, 184, -1, -1, -1,
+ -1, -1, -1, -1, 39, 46, -1, -1,
+ -1, -1, -1, 318, 320, -1, -1, 76,
+ 77, 78, -1, 184, -1, -1, -1, 184,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 50, 48, 49, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 47, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 188, 41,
+ -1, 42, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 43, -1, -1, -1,
+ 300, 304, -1, -1, 51, 44, -1, -1,
+ -1, -1, 185, -1, 52, 185, -1, -1,
+ 185, 17, 75, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 185,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 45, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 185,
+ 185, -1, -1, -1, -1, -1, -1, -1,
+ 39, 46, -1, -1, -1, -1, -1, 318,
+ 320, -1, -1, 76, 77, 78, -1, 185,
+ -1, -1, -1, 185, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 50, 48,
+ 49, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 47, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 188, 41, -1, 42, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 43, -1, -1, -1, 300, 304, -1, -1,
+ 51, 44, -1, -1, -1, -1, 183, -1,
+ 52, 183, -1, -1, 183, 15, 75, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 183, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 45, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 183, 183, -1, -1, -1,
+ -1, -1, -1, -1, 39, 46, -1, -1,
+ -1, -1, -1, 318, 320, -1, -1, 76,
+ 77, 78, -1, 183, -1, -1, -1, 183,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 50, 48, 49, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 47, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 188, 41,
+ -1, 42, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 43, -1, -1, -1,
+ 300, 304, -1, -1, 51, 44, -1, -1,
+ -1, -1, 186, -1, 52, 186, -1, -1,
+ 186, 18, 75, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 186,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 45, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 186,
+ 186, -1, -1, -1, -1, -1, -1, -1,
+ 39, 46, -1, -1, -1, -1, -1, 318,
+ 320, -1, -1, 76, 77, 78, -1, 186,
+ -1, -1, -1, 186, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 50, 48,
+ 49, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 47, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 188, 41, -1, 42, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 43, -1, -1, -1, 300, 304, -1, -1,
+ 51, 383, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 135, 138,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 136,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 137, 334, 334, 334, -1, 334, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 334, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 334, -1,
+ -1, -1, -1, -1, -1, 334, -1, -1,
+ -1, -1, -1, -1, 334, 334, -1, -1,
+ -1, -1, -1, -1, -1, -1, 334, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 334, 334, 334, 334, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 334, 334, 334, 334, 334, 334,
+ 334, 334, 334, 334, 334, 334, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 139, 289, 290, 284,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 137, 141, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 140, -1, 137, 143, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 296, 301, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 142, 31, 177, 120, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 117,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 284, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 178, 282,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 280, 172, 368, 369, 370, -1, 367,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 348, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 366,
+ -1, -1, -1, -1, -1, -1, 364, 365,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 371, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, 360, 361, 362,
+ 363, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 372,
+ 373, -1, -1, -1, 374, 375, 28, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, 357, -1, 359,
+ -1, 355, 358, 368, 369, 370, -1, 367,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 352, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 366,
+ -1, -1, -1, -1, -1, -1, 364, 365,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 371, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, 360, 361, 362,
+ 363, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 372,
+ 373, -1, -1, -1, 374, 375, 28, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, 357, -1, 359,
+ -1, 355, 358, 368, 369, 370, -1, 367,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 350, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 366,
+ -1, -1, -1, -1, -1, -1, 364, 365,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 371, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, 360, 361, 362,
+ 363, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 372,
+ 373, -1, -1, -1, 374, 375, 28, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, 357, -1, 359,
+ -1, 355, 358, 382, 325, -1, -1, -1,
+ 109, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 329, 326,
+ -1, -1, -1, 109, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 329, 332, 377, -1, -1, -1, -1,
+ 377, -1, 377, 377, -1, -1, 377, 377,
+ 377, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, 377, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 377, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, 377, 377, -1,
+ -1, -1, -1, -1, -1, -1, 377, 377,
+ -1, -1, -1, -1, -1, 377, 377, -1,
+ -1, 377, 377, 377, 377, 377, -1, 131,
+ -1, 377, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 377, 377, 377, 144,
+ 281, 283, -1, -1, 286, 340, 341, 342,
+ -1, 338, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 339, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 285, -1, -1, -1, -1, -1,
+ -1, 366, -1, -1, -1, -1, -1, -1,
+ 364, 365, -1, -1, -1, -1, -1, -1,
+ -1, -1, 343, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 360,
+ 361, 362, 363, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 62, 57,
+ 56, 372, 373, 58, 60, 61, 374, 375,
+ 28, 59, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, 333, 337, 335,
+ 336, 344, 145, 283, -1, -1, 291, 297,
+ 298, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 301, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 146, 118, -1, 119, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 288, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 147, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 287, 120,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 117, -1, -1, -1, -1, -1,
+ -1, -1, -1, 284, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 148, 282, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 280, 299, 200, 207,
+
+};
+
+unsigned short Parser_keys[] = {
+ 132, 226, 224, 224, 132, 227, 132, 239,
+ 132, 240, 132, 132, 132, 132, 45, 244,
+ 40, 245, 40, 245, 132, 246, 123, 132,
+ 123, 123, 59, 132, 45, 244, 139, 139,
+ 40, 287, 132, 194, 40, 287, 125, 227,
+ 61, 137, 40, 243, 59, 59, 59, 59,
+ 40, 40, 40, 245, 125, 239, 33, 276,
+ 33, 276, 40, 284, 132, 290, 40, 287,
+ 44, 59, 38, 151, 33, 276, 33, 202,
+ 33, 188, 33, 266, 33, 202, 33, 276,
+ 33, 276, 33, 202, 33, 202, 189, 274,
+ 189, 274, 186, 275, 142, 142, 33, 276,
+ 59, 59, 44, 59, 33, 276, 59, 59,
+ 40, 245, 42, 290, 42, 290, 42, 290,
+ 59, 59, 59, 59, 41, 41, 132, 289,
+ 41, 44, 33, 276, 186, 278, 189, 279,
+ 189, 279, 33, 276, 33, 276, 33, 276,
+ 33, 276, 33, 276, 33, 276, 33, 276,
+ 33, 276, 33, 276, 132, 288, 40, 270,
+ 40, 269, 40, 269, 40, 269, 40, 269,
+ 40, 269, 40, 269, 40, 207, 40, 207,
+ 40, 207, 40, 207, 203, 207, 203, 207,
+ 44, 271, 45, 276, 33, 276, 44, 251,
+ 132, 240, 40, 287, 59, 59, 40, 245,
+ 59, 59, 40, 245, 59, 59, 40, 245,
+ 41, 149, 186, 193, 189, 274, 189, 274,
+ 189, 193, 38, 151, 33, 276, 33, 276,
+ 33, 276, 33, 276, 132, 289, 132, 269,
+ 40, 243, 139, 139, 139, 139, 132, 269,
+ 132, 269, 44, 125, 139, 271, 61, 61,
+ 59, 59, 40, 269, 124, 124, 40, 287,
+ 40, 287, 40, 287, 132, 132, 189, 279,
+ 189, 279, 193, 193, 33, 188, 44, 44,
+ 41, 41, 41, 44, 40, 284, 44, 44,
+ 41, 44, 125, 125, 125, 271, 43, 270,
+ 40, 269, 125, 125, 41, 41, 41, 41,
+ 0, 0
+};
+
+unsigned int Parser_offsets[] = {
+ 0, 95, 96, 192, 300, 409, 410, 411,
+ 611, 817, 1023, 1138, 1148, 1149, 1223, 1423,
+ 1424, 1672, 1735, 1983, 2086, 2163, 2367, 2368,
+ 2369, 2370, 2576, 2691, 2935, 3179, 3424, 3583,
+ 3831, 3847, 3961, 4205, 4375, 4531, 4765, 4935,
+ 5179, 5423, 5593, 5763, 5849, 5935, 6025, 6026,
+ 6270, 6271, 6287, 6531, 6532, 6738, 6987, 7236,
+ 7485, 7486, 7487, 7488, 7646, 7650, 7894, 7987,
+ 8078, 8169, 8413, 8657, 8901, 9145, 9389, 9633,
+ 9877, 10121, 10365, 10522, 10753, 10983, 11213, 11443,
+ 11673, 11903, 12133, 12301, 12469, 12637, 12805, 12810,
+ 12815, 13043, 13275, 13519, 13727, 13836, 14084, 14085,
+ 14291, 14292, 14498, 14499, 14705, 14814, 14822, 14908,
+ 14994, 14999, 15113, 15357, 15601, 15845, 16089, 16247,
+ 16385, 16589, 16590, 16591, 16729, 16867, 16949, 17082,
+ 17083, 17084, 17314, 17315, 17563, 17811, 18059, 18060,
+ 18151, 18242, 18243, 18399, 18400, 18401, 18405, 18650,
+ 18651, 18655, 18656, 18803, 19031, 19261, 19262, 19263,
+ 19264
+};
+
+unsigned short Parser_targs[] = {
+ 1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 15,
+ 15, 15, 15, 15, 16, 17, 18, 19,
+ 20, 21, 22, 23, 24, 25, 26, 27,
+ 28, 29, 30, 31, 32, 33, 34, 35,
+ 35, 36, 37, 38, 39, 40, 41, 42,
+ 43, 44, 45, 46, 47, 48, 49, 50,
+ 51, 52, 53, 54, 55, 56, 57, 58,
+ 59, 60, 61, 62, 63, 64, 65, 66,
+ 67, 68, 69, 70, 71, 72, 73, 74,
+ 75, 76, 77, 78, 79, 80, 81, 82,
+ 83, 84, 85, 86, 87, 88, 89, 90,
+ 91, 92, 93, 94, 95, 96, 97, 98,
+ 99, 100, 101, 102, 103, 104, 105, 106,
+ 107, 108, 109, 110, 111, 112, 113, 114,
+ 115, 116, 117, 118, 119, 120, 121, 122,
+ 123, 124, 125, 126, 127, 128, 129, 130,
+ 131, 132, 133, 134, 135, 136, 137, 138,
+ 139, 140, 141, 142, 143, 144, 144, 144,
+ 144, 144, 144, 144, 144, 144, 144, 144,
+ 144, 144, 144, 144, 144, 144, 144, 144,
+ 144, 144, 144, 144, 144, 144, 144, 144,
+ 144, 144, 144, 144, 144, 144, 144, 144,
+ 144, 144, 144, 144, 144, 144, 144, 144,
+ 144, 144, 144, 144, 144, 144, 144, 144,
+ 144, 144, 144, 144, 144, 144, 144, 144,
+ 144, 144, 144, 144, 144, 144, 144, 144,
+ 144, 144, 144, 144, 144, 144, 144, 144,
+ 144, 144, 144, 144, 144, 144, 144, 144,
+ 144, 144, 144, 144, 144, 144, 144, 144,
+ 144, 144, 144, 144, 144, 144, 144, 144,
+ 144, 144, 144, 144, 144, 144, 144, 144,
+ 144, 144, 144, 144, 144, 144, 144, 144,
+ 144, 144, 144, 144, 144, 144, 144, 144,
+ 144, 144, 144, 144, 144, 144, 144, 144,
+ 144, 144, 144, 144, 144, 144, 144, 144,
+ 144, 144, 144, 144, 144, 144, 144, 144,
+ 144, 144, 144, 144, 144, 144, 144, 144,
+ 144, 144, 144, 144, 144, 144, 144, 144,
+ 144, 144, 144, 144, 144, 144, 144, 144,
+ 144, 144, 144, 144, 144, 144, 144, 144,
+ 144, 144, 144, 144, 144, 144, 144, 144,
+ 144, 144, 144, 144, 144, 144, 144, 144,
+ 144, 144, 144, 144, 144, 144, 144, 144,
+ 144, 144, 144, 144, 144, 144, 144, 144,
+ 144, 144, 144, 144, 144, 144, 144, 144,
+ 144, 144, 144, 144, 144, 144, 144, 144,
+ 144, 144, 144, 144, 144, 144, 144, 144,
+ 144, 144, 144, 144, 144, 144, 144, 144,
+ 144, 144
+};
+
+unsigned int Parser_actInds[] = {
+ 0, 2, 4, 6, 8, 10, 12, 14,
+ 16, 18, 20, 22, 24, 26, 28, 30,
+ 33, 36, 39, 42, 45, 47, 49, 51,
+ 53, 55, 57, 59, 61, 63, 65, 67,
+ 69, 71, 73, 75, 77, 79, 81, 83,
+ 85, 88, 90, 92, 94, 96, 98, 100,
+ 102, 104, 106, 108, 110, 112, 114, 116,
+ 118, 120, 122, 124, 126, 128, 130, 132,
+ 134, 136, 138, 140, 142, 144, 146, 148,
+ 150, 152, 154, 156, 158, 160, 162, 164,
+ 166, 168, 170, 172, 174, 176, 178, 180,
+ 182, 184, 186, 188, 190, 192, 195, 197,
+ 199, 201, 203, 205, 207, 209, 211, 213,
+ 215, 217, 219, 221, 223, 225, 227, 229,
+ 231, 233, 235, 237, 239, 241, 243, 245,
+ 247, 249, 251, 253, 255, 257, 259, 261,
+ 263, 265, 267, 269, 271, 273, 275, 277,
+ 279, 281, 283, 285, 287, 289, 291, 293,
+ 295, 297, 299, 301, 303, 305, 307, 309,
+ 311, 313, 315, 317, 319, 321, 323, 325,
+ 327, 329, 331, 333, 335, 337, 339, 341,
+ 343, 345, 347, 349, 351, 353, 355, 357,
+ 359, 361, 363, 365, 367, 369, 371, 373,
+ 375, 377, 379, 381, 383, 385, 387, 389,
+ 391, 393, 395, 397, 399, 401, 403, 405,
+ 407, 409, 411, 413, 415, 417, 419, 421,
+ 423, 425, 427, 429, 431, 433, 435, 437,
+ 439, 441, 443, 445, 447, 449, 451, 453,
+ 455, 457, 459, 461, 463, 465, 467, 469,
+ 471, 473, 475, 477, 479, 481, 483, 485,
+ 487, 489, 491, 493, 495, 497, 499, 501,
+ 503, 505, 507, 509, 511, 513, 515, 517,
+ 519, 521, 523, 525, 527, 529, 531, 533,
+ 535, 537, 539, 541, 543, 545, 547, 549,
+ 551, 553, 555, 557, 559, 561, 563, 565,
+ 567, 569, 571, 573, 575, 577, 579, 581,
+ 583, 585, 587, 589, 591, 593, 595, 597,
+ 599, 601, 603, 605, 607, 609, 611, 613,
+ 615, 617, 619, 621, 623, 625, 627, 629,
+ 631, 633, 635, 637, 639, 641, 643, 645,
+ 647, 649, 651, 653, 655, 657, 659, 661,
+ 663, 665, 667, 669, 671, 673, 675, 677,
+ 679, 681, 683, 685, 687, 689, 691, 693,
+ 695, 697, 699, 701, 703, 705, 707, 709,
+ 711, 713, 715, 717, 719, 721, 723, 725,
+ 727, 729, 731, 733, 735, 737, 739, 741,
+ 743, 745, 747, 749, 751, 753, 755, 757,
+ 759, 761, 763, 765, 767, 769, 771, 773,
+ 775, 777, 779, 781, 783, 785, 787, 789,
+ 791, 793
+};
+
+unsigned int Parser_actions[] = {
+ 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 170, 1,
+ 0, 174, 1, 0, 178, 1, 0, 182,
+ 1, 0, 186, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 66, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 270, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 2, 0, 7, 0, 10, 0, 15,
+ 0, 18, 0, 59, 0, 62, 0, 63,
+ 0, 66, 0, 71, 0, 75, 0, 79,
+ 0, 83, 0, 87, 0, 91, 0, 95,
+ 0, 99, 0, 103, 0, 107, 0, 111,
+ 0, 115, 0, 118, 0, 122, 0, 127,
+ 0, 131, 0, 135, 0, 139, 0, 143,
+ 0, 147, 0, 151, 0, 155, 0, 158,
+ 0, 162, 0, 166, 0, 170, 0, 174,
+ 0, 178, 0, 182, 0, 186, 0, 191,
+ 0, 195, 0, 199, 0, 203, 0, 207,
+ 0, 211, 0, 215, 0, 218, 0, 223,
+ 0, 226, 0, 231, 0, 235, 0, 239,
+ 0, 243, 0, 247, 0, 251, 0, 255,
+ 0, 259, 0, 263, 0, 267, 0, 270,
+ 0, 274, 0, 278, 0, 282, 0, 286,
+ 0, 291, 0, 295, 0, 299, 0, 303,
+ 0, 307, 0, 311, 0, 315, 0, 319,
+ 0, 323, 0, 327, 0, 331, 0, 335,
+ 0, 339, 0, 343, 0, 347, 0, 351,
+ 0, 355, 0, 359, 0, 363, 0, 367,
+ 0, 371, 0, 375, 0, 379, 0, 383,
+ 0, 387, 0, 391, 0, 395, 0, 399,
+ 0, 403, 0, 407, 0, 411, 0, 415,
+ 0, 419, 0, 423, 0, 427, 0, 431,
+ 0, 435, 0, 439, 0, 443, 0, 447,
+ 0, 451, 0, 455, 0, 459, 0, 463,
+ 0, 467, 0, 471, 0, 475, 0, 479,
+ 0, 483, 0, 487, 0, 491, 0, 495,
+ 0, 499, 0, 503, 0, 507, 0, 511,
+ 0, 515, 0, 519, 0, 523, 0, 527,
+ 0, 531, 0, 535, 0, 539, 0, 543,
+ 0, 547, 0, 551, 0, 555, 0, 559,
+ 0, 563, 0, 567, 0, 570, 0, 571,
+ 0, 575, 0, 578, 0, 583, 0, 587,
+ 0, 591, 0, 595, 0, 598, 0, 603,
+ 0, 607, 0, 611, 0, 615, 0, 619,
+ 0, 623, 0, 627, 0, 631, 0, 635,
+ 0, 639, 0, 643, 0, 647, 0, 651,
+ 0, 654, 0, 658, 0, 662, 0, 663,
+ 0, 667, 0, 671, 0, 675, 0, 679,
+ 0, 683, 0, 686, 0, 687, 0, 690,
+ 0, 691, 0, 695, 0, 699, 0, 703,
+ 0, 707, 0, 710, 0, 715, 0, 718,
+ 0, 723, 0, 727, 0, 731, 0, 735,
+ 0, 739, 0, 742, 0, 746, 0, 751,
+ 0, 755, 0, 758, 0, 763, 0, 767,
+ 0, 771, 0, 775, 0, 779, 0, 783,
+ 0, 787, 0, 791, 0, 795, 0, 799,
+ 0, 803, 0, 807, 0, 811, 0, 815,
+ 0, 819, 0, 823, 0, 827, 0, 831,
+ 0, 835, 0, 839, 0, 843, 0, 846,
+ 0, 851, 0, 855, 0, 859, 0, 863,
+ 0, 867, 0, 871, 0, 875, 0, 879,
+ 0, 883, 0, 887, 0, 891, 0, 895,
+ 0, 899, 0, 903, 0, 907, 0, 911,
+ 0, 915, 0, 919, 0, 923, 0, 927,
+ 0, 930, 0, 934, 0, 938, 0, 943,
+ 0, 946, 0, 951, 0, 955, 0, 23,
+ 0, 27, 0, 31, 0, 35, 0, 39,
+ 0, 43, 0, 47, 0, 51, 0, 55,
+ 0, 1, 0
+};
+
+int Parser_commitLen[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 2
+};
+
+unsigned int Parser_fssProdIdIndex[] = {
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39,
+ 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55,
+ 56, 57, 58, 59, 60, 61, 62, 63,
+ 64, 65, 66, 67, 68, 69, 70, 71,
+ 72, 73, 74, 75, 76, 77, 78, 79,
+ 80, 81, 82, 83, 84, 85, 86, 87,
+ 88, 89, 90, 91, 92, 93, 94, 95,
+ 96, 97, 98, 99, 100, 101, 102, 103,
+ 104, 105, 106, 107, 108, 109, 110, 111,
+ 112, 113, 114, 115, 116, 117, 118, 119,
+ 120, 121, 122, 123, 124, 125, 126, 127,
+ 128, 129, 130, 131, 132, 133, 134, 135,
+ 136, 137, 138, 139, 140, 141, 142, 143,
+ 144, 145, 146, 147, 148, 149, 150, 151,
+ 152, 153, 154, 155, 156, 157, 158, 159,
+ 160, 161, 162, 163, 164, 165, 166, 167,
+ 168, 169, 170, 171, 172, 173, 174, 175,
+ 176, 177, 178, 179, 180, 181, 182, 183,
+ 184, 185, 186, 187, 188, 189, 190, 191,
+ 192, 193, 194, 195, 196, 197, 198, 199,
+ 200, 201, 202, 203, 204, 205, 206, 207,
+ 208, 209, 210, 211, 212, 213, 214, 215,
+ 216, 217, 218, 219, 220, 221, 222, 223,
+ 224, 225, 226, 227, 228, 229, 230, 231,
+ 232, 233, 234, 235, 236, 237, 238, 239
+};
+
+char Parser_fssProdLengths[] = {
+ 1, 3, 0, 2, 0, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 4, 5, 5, 1, 5, 4, 3,
+ 4, 3, 3, 5, 2, 0, 1, 4,
+ 2, 1, 1, 1, 3, 2, 1, 0,
+ 3, 1, 3, 3, 3, 3, 1, 2,
+ 3, 3, 3, 3, 1, 3, 1, 3,
+ 1, 3, 3, 7, 3, 3, 3, 3,
+ 3, 3, 7, 1, 1, 1, 1, 1,
+ 1, 2, 1, 2, 1, 2, 1, 1,
+ 2, 1, 2, 1, 2, 1, 2, 1,
+ 2, 1, 2, 1, 2, 1, 2, 1,
+ 2, 1, 2, 1, 2, 1, 2, 1,
+ 2, 1, 2, 1, 2, 1, 2, 1,
+ 2, 1, 2, 1, 2, 1, 2, 1,
+ 2, 1, 2, 1, 2, 1, 2, 1,
+ 2, 1, 2, 1, 2, 1, 2, 1,
+ 2, 1, 2, 1, 3, 1, 1, 3,
+ 1, 1, 1, 2, 2, 1, 2, 2,
+ 2, 2, 4, 5, 5, 6, 1, 1,
+ 2, 2, 1, 1, 1, 1, 3, 3,
+ 3, 3, 3, 1, 1, 1, 2, 1,
+ 2, 0, 2, 1, 3, 3, 1, 1,
+ 2, 0, 1, 3, 2, 0, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 3, 3, 4, 3, 4, 3, 4,
+ 2, 2, 2, 0, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 4,
+ 2, 0, 2, 1, 0, 3, 1, 1
+};
+
+unsigned short Parser_prodLhsIds[] = {
+ 226, 225, 225, 227, 227, 228, 228, 228,
+ 228, 228, 228, 228, 228, 228, 238, 239,
+ 239, 237, 229, 230, 240, 231, 232, 232,
+ 233, 234, 235, 236, 246, 246, 242, 242,
+ 247, 247, 248, 248, 248, 249, 249, 249,
+ 241, 241, 252, 252, 252, 252, 252, 253,
+ 253, 253, 253, 253, 253, 254, 254, 255,
+ 255, 257, 257, 257, 257, 257, 257, 257,
+ 257, 257, 257, 257, 258, 258, 258, 258,
+ 261, 261, 261, 261, 261, 261, 261, 262,
+ 262, 262, 262, 262, 262, 262, 262, 262,
+ 262, 262, 262, 263, 263, 263, 263, 263,
+ 263, 263, 263, 263, 263, 263, 263, 264,
+ 264, 264, 264, 264, 264, 264, 264, 264,
+ 264, 264, 264, 265, 265, 265, 265, 265,
+ 265, 265, 265, 265, 265, 265, 265, 266,
+ 266, 266, 266, 266, 266, 266, 266, 266,
+ 266, 266, 266, 250, 250, 250, 269, 251,
+ 260, 259, 270, 270, 270, 267, 268, 268,
+ 268, 268, 268, 268, 268, 268, 268, 271,
+ 272, 272, 272, 273, 273, 273, 273, 273,
+ 273, 273, 273, 276, 276, 244, 244, 244,
+ 275, 275, 277, 277, 278, 278, 278, 278,
+ 274, 274, 279, 279, 243, 243, 280, 280,
+ 280, 283, 283, 283, 283, 283, 283, 281,
+ 281, 281, 281, 281, 281, 281, 281, 281,
+ 281, 281, 245, 245, 286, 286, 286, 282,
+ 282, 282, 282, 282, 282, 282, 287, 287,
+ 287, 287, 287, 284, 284, 284, 284, 284,
+ 256, 288, 285, 290, 290, 289, 289, 291
+};
+
+const char *Parser_prodNames[] = {
+ "start-1",
+ "section_list-1",
+ "section_list-2",
+ "statement_list-1",
+ "statement_list-2",
+ "statement-1",
+ "statement-2",
+ "statement-3",
+ "statement-4",
+ "statement-5",
+ "statement-6",
+ "statement-7",
+ "statement-8",
+ "statement-9",
+ "export_open-1",
+ "opt_export-1",
+ "opt_export-2",
+ "export_block-1",
+ "assignment-1",
+ "instantiation-1",
+ "machine_name-1",
+ "action_spec-1",
+ "alphtype_spec-1",
+ "alphtype_spec-2",
+ "range_spec-1",
+ "getkey_spec-1",
+ "access_spec-1",
+ "variable_spec-1",
+ "opt_whitespace-1",
+ "opt_whitespace-2",
+ "join_or_lm-1",
+ "join_or_lm-2",
+ "lm_part_list-1",
+ "lm_part_list-2",
+ "longest_match_part-1",
+ "longest_match_part-2",
+ "longest_match_part-3",
+ "opt_lm_part_action-1",
+ "opt_lm_part_action-2",
+ "opt_lm_part_action-3",
+ "join-1",
+ "join-2",
+ "expression-1",
+ "expression-2",
+ "expression-3",
+ "expression-4",
+ "expression-5",
+ "term-1",
+ "term-2",
+ "term-3",
+ "term-4",
+ "term-5",
+ "term-6",
+ "factor_with_label-1",
+ "factor_with_label-2",
+ "factor_with_ep-1",
+ "factor_with_ep-2",
+ "factor_with_aug-1",
+ "factor_with_aug-2",
+ "factor_with_aug-3",
+ "factor_with_aug-4",
+ "factor_with_aug-5",
+ "factor_with_aug-6",
+ "factor_with_aug-7",
+ "factor_with_aug-8",
+ "factor_with_aug-9",
+ "factor_with_aug-10",
+ "factor_with_aug-11",
+ "aug_type_base-1",
+ "aug_type_base-2",
+ "aug_type_base-3",
+ "aug_type_base-4",
+ "aug_type_cond-1",
+ "aug_type_cond-2",
+ "aug_type_cond-3",
+ "aug_type_cond-4",
+ "aug_type_cond-5",
+ "aug_type_cond-6",
+ "aug_type_cond-7",
+ "aug_type_to_state-1",
+ "aug_type_to_state-2",
+ "aug_type_to_state-3",
+ "aug_type_to_state-4",
+ "aug_type_to_state-5",
+ "aug_type_to_state-6",
+ "aug_type_to_state-7",
+ "aug_type_to_state-8",
+ "aug_type_to_state-9",
+ "aug_type_to_state-10",
+ "aug_type_to_state-11",
+ "aug_type_to_state-12",
+ "aug_type_from_state-1",
+ "aug_type_from_state-2",
+ "aug_type_from_state-3",
+ "aug_type_from_state-4",
+ "aug_type_from_state-5",
+ "aug_type_from_state-6",
+ "aug_type_from_state-7",
+ "aug_type_from_state-8",
+ "aug_type_from_state-9",
+ "aug_type_from_state-10",
+ "aug_type_from_state-11",
+ "aug_type_from_state-12",
+ "aug_type_eof-1",
+ "aug_type_eof-2",
+ "aug_type_eof-3",
+ "aug_type_eof-4",
+ "aug_type_eof-5",
+ "aug_type_eof-6",
+ "aug_type_eof-7",
+ "aug_type_eof-8",
+ "aug_type_eof-9",
+ "aug_type_eof-10",
+ "aug_type_eof-11",
+ "aug_type_eof-12",
+ "aug_type_gbl_error-1",
+ "aug_type_gbl_error-2",
+ "aug_type_gbl_error-3",
+ "aug_type_gbl_error-4",
+ "aug_type_gbl_error-5",
+ "aug_type_gbl_error-6",
+ "aug_type_gbl_error-7",
+ "aug_type_gbl_error-8",
+ "aug_type_gbl_error-9",
+ "aug_type_gbl_error-10",
+ "aug_type_gbl_error-11",
+ "aug_type_gbl_error-12",
+ "aug_type_local_error-1",
+ "aug_type_local_error-2",
+ "aug_type_local_error-3",
+ "aug_type_local_error-4",
+ "aug_type_local_error-5",
+ "aug_type_local_error-6",
+ "aug_type_local_error-7",
+ "aug_type_local_error-8",
+ "aug_type_local_error-9",
+ "aug_type_local_error-10",
+ "aug_type_local_error-11",
+ "aug_type_local_error-12",
+ "action_embed-1",
+ "action_embed-2",
+ "action_embed-3",
+ "action_embed_word-1",
+ "action_embed_block-1",
+ "priority_name-1",
+ "priority_aug-1",
+ "priority_aug_num-1",
+ "priority_aug_num-2",
+ "priority_aug_num-3",
+ "local_err_name-1",
+ "factor_with_rep-1",
+ "factor_with_rep-2",
+ "factor_with_rep-3",
+ "factor_with_rep-4",
+ "factor_with_rep-5",
+ "factor_with_rep-6",
+ "factor_with_rep-7",
+ "factor_with_rep-8",
+ "factor_with_rep-9",
+ "factor_rep_num-1",
+ "factor_with_neg-1",
+ "factor_with_neg-2",
+ "factor_with_neg-3",
+ "factor-1",
+ "factor-2",
+ "factor-3",
+ "factor-4",
+ "factor-5",
+ "factor-6",
+ "factor-7",
+ "factor-8",
+ "range_lit-1",
+ "range_lit-2",
+ "alphabet_num-1",
+ "alphabet_num-2",
+ "alphabet_num-3",
+ "regular_expr-1",
+ "regular_expr-2",
+ "regular_expr_item-1",
+ "regular_expr_item-2",
+ "regular_expr_char-1",
+ "regular_expr_char-2",
+ "regular_expr_char-3",
+ "regular_expr_char-4",
+ "regular_expr_or_data-1",
+ "regular_expr_or_data-2",
+ "regular_expr_or_char-1",
+ "regular_expr_or_char-2",
+ "inline_block-1",
+ "inline_block-2",
+ "inline_block_item-1",
+ "inline_block_item-2",
+ "inline_block_item-3",
+ "inline_block_symbol-1",
+ "inline_block_symbol-2",
+ "inline_block_symbol-3",
+ "inline_block_symbol-4",
+ "inline_block_symbol-5",
+ "inline_block_symbol-6",
+ "inline_block_interpret-1",
+ "inline_block_interpret-2",
+ "inline_block_interpret-3",
+ "inline_block_interpret-4",
+ "inline_block_interpret-5",
+ "inline_block_interpret-6",
+ "inline_block_interpret-7",
+ "inline_block_interpret-8",
+ "inline_block_interpret-9",
+ "inline_block_interpret-10",
+ "inline_block_interpret-11",
+ "inline_expr-1",
+ "inline_expr-2",
+ "inline_expr_item-1",
+ "inline_expr_item-2",
+ "inline_expr_item-3",
+ "inline_expr_any-1",
+ "inline_expr_any-2",
+ "inline_expr_any-3",
+ "inline_expr_any-4",
+ "inline_expr_any-5",
+ "inline_expr_any-6",
+ "inline_expr_any-7",
+ "inline_expr_symbol-1",
+ "inline_expr_symbol-2",
+ "inline_expr_symbol-3",
+ "inline_expr_symbol-4",
+ "inline_expr_symbol-5",
+ "inline_expr_interpret-1",
+ "inline_expr_interpret-2",
+ "inline_expr_interpret-3",
+ "inline_expr_interpret-4",
+ "inline_expr_interpret-5",
+ "local_state_ref-1",
+ "no_name_sep-1",
+ "state_ref-1",
+ "opt_name_sep-1",
+ "opt_name_sep-2",
+ "state_ref_names-1",
+ "state_ref_names-2",
+ "_start-1"
+};
+
+const char *Parser_lelNames[] = {
+ "D-0",
+ "D-1",
+ "D-2",
+ "D-3",
+ "D-4",
+ "D-5",
+ "D-6",
+ "D-7",
+ "D-8",
+ "D-9",
+ "D-10",
+ "D-11",
+ "D-12",
+ "D-13",
+ "D-14",
+ "D-15",
+ "D-16",
+ "D-17",
+ "D-18",
+ "D-19",
+ "D-20",
+ "D-21",
+ "D-22",
+ "D-23",
+ "D-24",
+ "D-25",
+ "D-26",
+ "D-27",
+ "D-28",
+ "D-29",
+ "D-30",
+ "D-31",
+ "D-32",
+ "!",
+ "\"",
+ "#",
+ "$",
+ "%",
+ "&",
+ "'",
+ "(",
+ ")",
+ "*",
+ "+",
+ ",",
+ "-",
+ ".",
+ "/",
+ "0",
+ "1",
+ "2",
+ "3",
+ "4",
+ "5",
+ "6",
+ "7",
+ "8",
+ "9",
+ ":",
+ ";",
+ "<",
+ "=",
+ ">",
+ "?",
+ "@",
+ "A",
+ "B",
+ "C",
+ "D",
+ "E",
+ "F",
+ "G",
+ "H",
+ "I",
+ "J",
+ "K",
+ "L",
+ "M",
+ "N",
+ "O",
+ "P",
+ "Q",
+ "R",
+ "S",
+ "T",
+ "U",
+ "V",
+ "W",
+ "X",
+ "Y",
+ "Z",
+ "[",
+ "\\",
+ "]",
+ "^",
+ "_",
+ "`",
+ "a",
+ "b",
+ "c",
+ "d",
+ "e",
+ "f",
+ "g",
+ "h",
+ "i",
+ "j",
+ "k",
+ "l",
+ "m",
+ "n",
+ "o",
+ "p",
+ "q",
+ "r",
+ "s",
+ "t",
+ "u",
+ "v",
+ "w",
+ "x",
+ "y",
+ "z",
+ "{",
+ "|",
+ "}",
+ "~",
+ "D-127",
+ "KW_Machine",
+ "KW_Include",
+ "KW_Import",
+ "KW_Write",
+ "TK_Word",
+ "TK_Literal",
+ "TK_Number",
+ "TK_Inline",
+ "TK_Reference",
+ "TK_ColonEquals",
+ "TK_EndSection",
+ "TK_UInt",
+ "TK_Hex",
+ "TK_BaseClause",
+ "TK_DotDot",
+ "TK_ColonGt",
+ "TK_ColonGtGt",
+ "TK_LtColon",
+ "TK_Arrow",
+ "TK_DoubleArrow",
+ "TK_StarStar",
+ "TK_NameSep",
+ "TK_BarStar",
+ "TK_DashDash",
+ "TK_StartCond",
+ "TK_AllCond",
+ "TK_LeavingCond",
+ "TK_Middle",
+ "TK_StartGblError",
+ "TK_AllGblError",
+ "TK_FinalGblError",
+ "TK_NotFinalGblError",
+ "TK_NotStartGblError",
+ "TK_MiddleGblError",
+ "TK_StartLocalError",
+ "TK_AllLocalError",
+ "TK_FinalLocalError",
+ "TK_NotFinalLocalError",
+ "TK_NotStartLocalError",
+ "TK_MiddleLocalError",
+ "TK_StartEOF",
+ "TK_AllEOF",
+ "TK_FinalEOF",
+ "TK_NotFinalEOF",
+ "TK_NotStartEOF",
+ "TK_MiddleEOF",
+ "TK_StartToState",
+ "TK_AllToState",
+ "TK_FinalToState",
+ "TK_NotFinalToState",
+ "TK_NotStartToState",
+ "TK_MiddleToState",
+ "TK_StartFromState",
+ "TK_AllFromState",
+ "TK_FinalFromState",
+ "TK_NotFinalFromState",
+ "TK_NotStartFromState",
+ "TK_MiddleFromState",
+ "RE_Slash",
+ "RE_SqOpen",
+ "RE_SqOpenNeg",
+ "RE_SqClose",
+ "RE_Dot",
+ "RE_Star",
+ "RE_Dash",
+ "RE_Char",
+ "IL_WhiteSpace",
+ "IL_Comment",
+ "IL_Literal",
+ "IL_Symbol",
+ "KW_Action",
+ "KW_AlphType",
+ "KW_Range",
+ "KW_GetKey",
+ "KW_When",
+ "KW_Eof",
+ "KW_Err",
+ "KW_Lerr",
+ "KW_To",
+ "KW_From",
+ "KW_Export",
+ "KW_Break",
+ "KW_Exec",
+ "KW_Hold",
+ "KW_PChar",
+ "KW_Char",
+ "KW_Goto",
+ "KW_Call",
+ "KW_Ret",
+ "KW_CurState",
+ "KW_TargState",
+ "KW_Entry",
+ "KW_Next",
+ "KW_Variable",
+ "KW_Access",
+ "TK_Semi",
+ "_eof",
+ "section_list",
+ "start",
+ "statement_list",
+ "statement",
+ "assignment",
+ "instantiation",
+ "action_spec",
+ "alphtype_spec",
+ "range_spec",
+ "getkey_spec",
+ "access_spec",
+ "variable_spec",
+ "export_block",
+ "export_open",
+ "opt_export",
+ "machine_name",
+ "join",
+ "join_or_lm",
+ "inline_block",
+ "alphabet_num",
+ "inline_expr",
+ "opt_whitespace",
+ "lm_part_list",
+ "longest_match_part",
+ "opt_lm_part_action",
+ "action_embed",
+ "action_embed_block",
+ "expression",
+ "term",
+ "factor_with_label",
+ "factor_with_ep",
+ "local_state_ref",
+ "factor_with_aug",
+ "aug_type_base",
+ "priority_aug",
+ "priority_name",
+ "aug_type_cond",
+ "aug_type_to_state",
+ "aug_type_from_state",
+ "aug_type_eof",
+ "aug_type_gbl_error",
+ "aug_type_local_error",
+ "local_err_name",
+ "factor_with_rep",
+ "action_embed_word",
+ "priority_aug_num",
+ "factor_rep_num",
+ "factor_with_neg",
+ "factor",
+ "regular_expr_or_data",
+ "regular_expr",
+ "range_lit",
+ "regular_expr_item",
+ "regular_expr_char",
+ "regular_expr_or_char",
+ "inline_block_item",
+ "inline_block_interpret",
+ "inline_expr_any",
+ "inline_block_symbol",
+ "inline_expr_interpret",
+ "state_ref",
+ "inline_expr_item",
+ "inline_expr_symbol",
+ "no_name_sep",
+ "state_ref_names",
+ "opt_name_sep",
+ "_start"
+};
+
+#line 1375 "rlparse.kl"
+
+
+void Parser::init()
+{
+ #line 3769 "rlparse.cpp"
+ curs = Parser_startState;
+ pool = 0;
+ freshEl = (struct Parser_LangEl*) malloc( sizeof(struct Parser_LangEl)*8128);
+ #ifdef LOG_ACTIONS
+ cerr << "allocating 8128 LangEls" << endl;
+ #endif
+ stackTop = freshEl;
+ stackTop->type = 0;
+ stackTop->state = -1;
+ stackTop->next = 0;
+ stackTop->child = 0;
+ freshPos = 1;
+ lastFinal = stackTop;
+ numRetry = 0;
+ numNodes = 0;
+ errCount = 0;
+#line 1380 "rlparse.kl"
+}
+
+int Parser::parseLangEl( int type, const Token *token )
+{
+ #line 3791 "rlparse.cpp"
+#define reject() induceReject = 1
+
+ int pos, targState;
+ unsigned int *action;
+ int rhsLen;
+ struct Parser_LangEl *rhs[32];
+ struct Parser_LangEl *lel;
+ struct Parser_LangEl *input;
+ char induceReject;
+
+ if ( curs < 0 )
+ return 0;
+
+ if ( pool == 0 ) {
+ if ( freshPos == 8128 ) {
+ freshEl = (struct Parser_LangEl*) malloc(
+ sizeof(struct Parser_LangEl)*8128);
+ #ifdef LOG_ACTIONS
+ cerr << "allocating 8128 LangEls" << endl;
+ #endif
+ freshPos = 0;
+ }
+ input = freshEl + freshPos++;
+ }
+ else {
+ input = pool;
+ pool = pool->next;
+ }
+ numNodes += 1;
+ input->type = type;
+ input->user.token = *token;
+ input->next = 0;
+ input->retry = 0;
+ input->child = 0;
+
+again:
+ if ( input == 0 )
+ goto _out;
+
+ lel = input;
+ if ( lel->type < Parser_keys[curs<<1] || lel->type > Parser_keys[(curs<<1)+1] )
+ goto parseError;
+
+ pos = Parser_indicies[Parser_offsets[curs] + (lel->type - Parser_keys[curs<<1])];
+ if ( pos < 0 )
+ goto parseError;
+
+ induceReject = 0;
+ targState = Parser_targs[pos];
+ action = Parser_actions + Parser_actInds[pos];
+ if ( lel->retry & 0x0000ffff )
+ action += (lel->retry & 0x0000ffff);
+
+ if ( *action & 0x1 ) {
+ #ifdef LOG_ACTIONS
+ cerr << "shifted: " << Parser_lelNames[lel->type];
+ #endif
+ input = input->next;
+ lel->state = curs;
+ lel->next = stackTop;
+ stackTop = lel;
+
+ if ( action[1] == 0 )
+ lel->retry &= 0xffff0000;
+ else {
+ lel->retry += 1;
+ numRetry += 1;
+ #ifdef LOG_ACTIONS
+ cerr << " retry: " << stackTop;
+ #endif
+ }
+ #ifdef LOG_ACTIONS
+ cerr << endl;
+ #endif
+ }
+
+ if ( Parser_commitLen[pos] != 0 ) {
+ struct Parser_LangEl *commitHead = stackTop;
+ int absCommitLen = Parser_commitLen[pos];
+
+ #ifdef LOG_ACTIONS
+ cerr << "running commit of length: " << Parser_commitLen[pos] << endl;
+ #endif
+
+ if ( absCommitLen < 0 ) {
+ commitHead = commitHead->next;
+ absCommitLen = -1 * absCommitLen;
+ }
+ {
+ struct Parser_LangEl *lel = commitHead;
+ struct Parser_LangEl **cmStack = (struct Parser_LangEl**) malloc( sizeof(struct Parser_LangEl) * numNodes);
+ int n = absCommitLen, depth = 0, sp = 0;
+
+commit_head:
+ if ( lel->retry > 0 ) {
+ if ( lel->retry & 0x0000ffff )
+ numRetry -= 1;
+ if ( lel->retry & 0xffff0000 )
+ numRetry -= 1;
+ lel->retry = 0;
+ }
+
+ /* If depth is > 0 then move over lel freely, otherwise, make
+ * sure that we have not already done n steps down the line. */
+ if ( lel->next != 0 && ( depth > 0 || n > 1 ) ) {
+ cmStack[sp++] = lel;
+ lel = lel->next;
+
+ /* If we are at the top level count the steps down the line. */
+ if ( depth == 0 )
+ n -= 1;
+ goto commit_head;
+ }
+
+commit_reverse:
+ if ( lel->child != 0 ) {
+ cmStack[sp++] = lel;
+ lel = lel->child;
+
+ /* When we move down we need to increment the depth. */
+ depth += 1;
+ goto commit_head;
+ }
+
+commit_upwards:
+ if ( sp > 0 ) {
+ /* Figure out which place to return to. */
+ if ( cmStack[sp-1]->next == lel ) {
+ lel = cmStack[--sp];
+ goto commit_reverse;
+ }
+ else {
+ /* Going back up, adjust the depth. */
+ lel = cmStack[--sp];
+ depth -= 1;
+ goto commit_upwards;
+ }
+ }
+ free( cmStack );
+ }
+ if ( numRetry == 0 ) {
+ #ifdef LOG_ACTIONS
+ cerr << "number of retries is zero, "
+ "executing final actions" << endl;
+ #endif
+ {
+ struct Parser_LangEl *lel = commitHead;
+ struct Parser_LangEl **cmStack = (struct Parser_LangEl**) malloc( sizeof( struct Parser_LangEl) * numNodes);
+ int sp = 0;
+ char doExec = 0;
+
+final_head:
+ if ( lel == lastFinal ) {
+ doExec = 1;
+ goto hit_final;
+ }
+
+ if ( lel->next != 0 ) {
+ cmStack[sp++] = lel;
+ lel = lel->next;
+ goto final_head;
+ }
+
+final_reverse:
+
+ if ( lel->child != 0 ) {
+ cmStack[sp++] = lel;
+ lel = lel->child;
+ goto final_head;
+ }
+
+final_upwards:
+
+ if ( doExec ) {
+{
+ if ( lel->type < 225 ) {
+ }
+ else {
+ struct Parser_LangEl *redLel = lel;
+ if ( redLel->child != 0 ) {
+ int r = Parser_fssProdLengths[redLel->reduction] - 1;
+ struct Parser_LangEl *rhsEl = redLel->child;
+ while ( rhsEl != 0 ) {
+ rhs[r--] = rhsEl;
+ rhsEl = rhsEl->next;
+ }
+ }
+switch ( lel->reduction ) {
+case 14: {
+#line 59 "rlparse.kl"
+
+ exportContext.append( true );
+
+
+#line 3985 "rlparse.cpp"
+} break;
+case 15: {
+#line 68 "rlparse.kl"
+ (&redLel->user.opt_export)->isSet = true;
+
+#line 3991 "rlparse.cpp"
+} break;
+case 16: {
+#line 69 "rlparse.kl"
+ (&redLel->user.opt_export)->isSet = false;
+
+#line 3997 "rlparse.cpp"
+} break;
+case 17: {
+#line 72 "rlparse.kl"
+
+ exportContext.remove( exportContext.length()-1 );
+
+
+#line 4005 "rlparse.cpp"
+} break;
+case 18: {
+#line 77 "rlparse.kl"
+
+ /* Main machine must be an instance. */
+ bool isInstance = false;
+ if ( strcmp((&rhs[1]->user.token_type)->token.data, mainMachine) == 0 ) {
+ warning((&rhs[1]->user.token_type)->token.loc) <<
+ "main machine will be implicitly instantiated" << endl;
+ isInstance = true;
+ }
+
+ /* Generic creation of machine for instantiation and assignment. */
+ JoinOrLm *joinOrLm = new JoinOrLm( (&rhs[3]->user.join)->join );
+ tryMachineDef( (&rhs[1]->user.token_type)->token.loc, (&rhs[1]->user.token_type)->token.data, joinOrLm, isInstance );
+
+ if ( (&rhs[0]->user.opt_export)->isSet )
+ exportContext.remove( exportContext.length()-1 );
+
+
+#line 4026 "rlparse.cpp"
+} break;
+case 19: {
+#line 95 "rlparse.kl"
+
+ /* Generic creation of machine for instantiation and assignment. */
+ tryMachineDef( (&rhs[1]->user.token_type)->token.loc, (&rhs[1]->user.token_type)->token.data, (&rhs[3]->user.join_or_lm)->joinOrLm, true );
+
+ if ( (&rhs[0]->user.opt_export)->isSet )
+ exportContext.remove( exportContext.length()-1 );
+
+
+#line 4038 "rlparse.cpp"
+} break;
+case 20: {
+#line 111 "rlparse.kl"
+
+ /* Make/get the priority key. The name may have already been referenced
+ * and therefore exist. */
+ PriorDictEl *priorDictEl;
+ if ( pd->priorDict.insert( (&rhs[0]->user.token)->data, pd->nextPriorKey, &priorDictEl ) )
+ pd->nextPriorKey += 1;
+ pd->curDefPriorKey = priorDictEl->value;
+
+ /* Make/get the local error key. */
+ LocalErrDictEl *localErrDictEl;
+ if ( pd->localErrDict.insert( (&rhs[0]->user.token)->data, pd->nextLocalErrKey, &localErrDictEl ) )
+ pd->nextLocalErrKey += 1;
+ pd->curDefLocalErrKey = localErrDictEl->value;
+
+ (&redLel->user.token_type)->token = *(&rhs[0]->user.token);
+
+
+#line 4059 "rlparse.cpp"
+} break;
+case 21: {
+#line 129 "rlparse.kl"
+
+ if ( pd->actionDict.find( (&rhs[1]->user.token)->data ) ) {
+ /* Recover by just ignoring the duplicate. */
+ error((&rhs[1]->user.token)->loc) << "action \"" << (&rhs[1]->user.token)->data << "\" already defined" << endl;
+ }
+ else {
+ //cerr << "NEW ACTION " << $2->data << " " << $4->inlineList << endl;
+ /* Add the action to the list of actions. */
+ Action *newAction = new Action( (&rhs[2]->user.token)->loc, (&rhs[1]->user.token)->data,
+ (&rhs[3]->user.inline_list)->inlineList, pd->nextCondId++ );
+
+ /* Insert to list and dict. */
+ pd->actionList.append( newAction );
+ pd->actionDict.insert( newAction );
+ }
+
+
+#line 4080 "rlparse.cpp"
+} break;
+case 22: {
+#line 149 "rlparse.kl"
+
+ if ( ! pd->setAlphType( (&rhs[1]->user.token)->data, (&rhs[2]->user.token)->data ) ) {
+ // Recover by ignoring the alphtype statement.
+ error((&rhs[1]->user.token)->loc) << "\"" << (&rhs[1]->user.token)->data <<
+ " " << (&rhs[2]->user.token)->data << "\" is not a valid alphabet type" << endl;
+ }
+
+
+#line 4092 "rlparse.cpp"
+} break;
+case 23: {
+#line 158 "rlparse.kl"
+
+ if ( ! pd->setAlphType( (&rhs[1]->user.token)->data ) ) {
+ // Recover by ignoring the alphtype statement.
+ error((&rhs[1]->user.token)->loc) << "\"" << (&rhs[1]->user.token)->data <<
+ "\" is not a valid alphabet type" << endl;
+ }
+
+
+#line 4104 "rlparse.cpp"
+} break;
+case 24: {
+#line 168 "rlparse.kl"
+
+ // Save the upper and lower ends of the range and emit the line number.
+ pd->lowerNum = (&rhs[1]->user.token_type)->token.data;
+ pd->upperNum = (&rhs[2]->user.token_type)->token.data;
+ pd->rangeLowLoc = (&rhs[1]->user.token_type)->token.loc;
+ pd->rangeHighLoc = (&rhs[2]->user.token_type)->token.loc;
+
+
+#line 4116 "rlparse.cpp"
+} break;
+case 25: {
+#line 177 "rlparse.kl"
+
+ pd->getKeyExpr = (&rhs[1]->user.inline_list)->inlineList;
+
+
+#line 4124 "rlparse.cpp"
+} break;
+case 26: {
+#line 182 "rlparse.kl"
+
+ pd->accessExpr = (&rhs[1]->user.inline_list)->inlineList;
+
+
+#line 4132 "rlparse.cpp"
+} break;
+case 27: {
+#line 187 "rlparse.kl"
+
+ /* FIXME: Need to implement the rest of this. */
+ if ( strcmp( (&rhs[2]->user.token)->data, "curstate" ) == 0 )
+ pd->curStateExpr = (&rhs[3]->user.inline_list)->inlineList;
+ else {
+ error((&rhs[2]->user.token)->loc) << "sorry, unimplementd" << endl;
+ }
+
+
+#line 4145 "rlparse.cpp"
+} break;
+case 30: {
+#line 209 "rlparse.kl"
+
+ (&redLel->user.join_or_lm)->joinOrLm = new JoinOrLm( (&rhs[0]->user.join)->join );
+
+
+#line 4153 "rlparse.cpp"
+} break;
+case 31: {
+#line 213 "rlparse.kl"
+
+ /* Create a new factor going to a longest match structure. Record
+ * in the parse data that we have a longest match. */
+ LongestMatch *lm = new LongestMatch( (&rhs[0]->user.token)->loc, (&rhs[1]->user.lm_part_list)->lmPartList );
+ pd->lmList.append( lm );
+ for ( LmPartList::Iter lmp = *((&rhs[1]->user.lm_part_list)->lmPartList); lmp.lte(); lmp++ )
+ lmp->longestMatch = lm;
+ (&redLel->user.join_or_lm)->joinOrLm = new JoinOrLm( lm );
+
+
+#line 4167 "rlparse.cpp"
+} break;
+case 32: {
+#line 229 "rlparse.kl"
+
+ if ( (&rhs[1]->user.longest_match_part)->lmPart != 0 )
+ (&rhs[0]->user.lm_part_list)->lmPartList->append( (&rhs[1]->user.longest_match_part)->lmPart );
+ (&redLel->user.lm_part_list)->lmPartList = (&rhs[0]->user.lm_part_list)->lmPartList;
+
+
+#line 4177 "rlparse.cpp"
+} break;
+case 33: {
+#line 235 "rlparse.kl"
+
+ /* Create a new list with the part. */
+ (&redLel->user.lm_part_list)->lmPartList = new LmPartList;
+ if ( (&rhs[0]->user.longest_match_part)->lmPart != 0 )
+ (&redLel->user.lm_part_list)->lmPartList->append( (&rhs[0]->user.longest_match_part)->lmPart );
+
+
+#line 4188 "rlparse.cpp"
+} break;
+case 34: {
+#line 248 "rlparse.kl"
+ (&redLel->user.longest_match_part)->lmPart = 0;
+
+#line 4194 "rlparse.cpp"
+} break;
+case 35: {
+#line 250 "rlparse.kl"
+ (&redLel->user.longest_match_part)->lmPart = 0;
+
+#line 4200 "rlparse.cpp"
+} break;
+case 36: {
+#line 252 "rlparse.kl"
+
+ (&redLel->user.longest_match_part)->lmPart = 0;
+ Action *action = (&rhs[1]->user.opt_lm_part_action)->action;
+ if ( action != 0 )
+ action->isLmAction = true;
+ (&redLel->user.longest_match_part)->lmPart = new LongestMatchPart( (&rhs[0]->user.join)->join, action,
+ (&rhs[2]->user.token)->loc, pd->nextLongestMatchId++ );
+
+
+#line 4213 "rlparse.cpp"
+} break;
+case 37: {
+#line 267 "rlparse.kl"
+
+ (&redLel->user.opt_lm_part_action)->action = (&rhs[1]->user.action_ref)->action;
+
+
+#line 4221 "rlparse.cpp"
+} break;
+case 38: {
+#line 271 "rlparse.kl"
+
+ (&redLel->user.opt_lm_part_action)->action = (&rhs[0]->user.action_ref)->action;
+
+
+#line 4229 "rlparse.cpp"
+} break;
+case 39: {
+#line 275 "rlparse.kl"
+
+ (&redLel->user.opt_lm_part_action)->action = 0;
+
+
+#line 4237 "rlparse.cpp"
+} break;
+case 40: {
+#line 286 "rlparse.kl"
+
+ /* Append the expression to the list and return it. */
+ (&rhs[0]->user.join)->join->exprList.append( (&rhs[2]->user.expression)->expression );
+ (&redLel->user.join)->join = (&rhs[0]->user.join)->join;
+
+
+#line 4247 "rlparse.cpp"
+} break;
+case 41: {
+#line 292 "rlparse.kl"
+
+ (&redLel->user.join)->join = new Join( (&rhs[0]->user.expression)->expression );
+
+
+#line 4255 "rlparse.cpp"
+} break;
+case 42: {
+#line 302 "rlparse.kl"
+
+ (&redLel->user.expression)->expression = new Expression( (&rhs[0]->user.expression)->expression,
+ (&rhs[2]->user.term)->term, Expression::OrType );
+
+
+#line 4264 "rlparse.cpp"
+} break;
+case 43: {
+#line 307 "rlparse.kl"
+
+ (&redLel->user.expression)->expression = new Expression( (&rhs[0]->user.expression)->expression,
+ (&rhs[2]->user.term)->term, Expression::IntersectType );
+
+
+#line 4273 "rlparse.cpp"
+} break;
+case 44: {
+#line 314 "rlparse.kl"
+
+ (&redLel->user.expression)->expression = new Expression( (&rhs[0]->user.expression)->expression,
+ (&rhs[2]->user.term)->term, Expression::SubtractType );
+
+
+#line 4282 "rlparse.cpp"
+} break;
+case 45: {
+#line 319 "rlparse.kl"
+
+ (&redLel->user.expression)->expression = new Expression( (&rhs[0]->user.expression)->expression,
+ (&rhs[2]->user.term)->term, Expression::StrongSubtractType );
+
+
+#line 4291 "rlparse.cpp"
+} break;
+case 46: {
+#line 324 "rlparse.kl"
+
+ (&redLel->user.expression)->expression = new Expression( (&rhs[0]->user.term)->term );
+
+
+#line 4299 "rlparse.cpp"
+} break;
+case 47: {
+#line 334 "rlparse.kl"
+
+ (&redLel->user.term)->term = new Term( (&rhs[0]->user.term)->term, (&rhs[1]->user.factor_with_label)->factorWithAug );
+
+
+#line 4307 "rlparse.cpp"
+} break;
+case 48: {
+#line 338 "rlparse.kl"
+
+ (&redLel->user.term)->term = new Term( (&rhs[0]->user.term)->term, (&rhs[2]->user.factor_with_label)->factorWithAug );
+
+
+#line 4315 "rlparse.cpp"
+} break;
+case 49: {
+#line 342 "rlparse.kl"
+
+ (&redLel->user.term)->term = new Term( (&rhs[0]->user.term)->term, (&rhs[2]->user.factor_with_label)->factorWithAug, Term::RightStartType );
+
+
+#line 4323 "rlparse.cpp"
+} break;
+case 50: {
+#line 346 "rlparse.kl"
+
+ (&redLel->user.term)->term = new Term( (&rhs[0]->user.term)->term, (&rhs[2]->user.factor_with_label)->factorWithAug, Term::RightFinishType );
+
+
+#line 4331 "rlparse.cpp"
+} break;
+case 51: {
+#line 350 "rlparse.kl"
+
+ (&redLel->user.term)->term = new Term( (&rhs[0]->user.term)->term,
+ (&rhs[2]->user.factor_with_label)->factorWithAug, Term::LeftType );
+
+
+#line 4340 "rlparse.cpp"
+} break;
+case 52: {
+#line 355 "rlparse.kl"
+
+ (&redLel->user.term)->term = new Term( (&rhs[0]->user.factor_with_label)->factorWithAug );
+
+
+#line 4348 "rlparse.cpp"
+} break;
+case 53: {
+#line 365 "rlparse.kl"
+
+ /* Add the label to the list and pass the factor up. */
+ (&rhs[2]->user.factor_with_label)->factorWithAug->labels.prepend( Label((&rhs[0]->user.token)->loc, (&rhs[0]->user.token)->data) );
+ (&redLel->user.factor_with_label)->factorWithAug = (&rhs[2]->user.factor_with_label)->factorWithAug;
+
+
+#line 4358 "rlparse.cpp"
+} break;
+case 54: {
+#line 371 "rlparse.kl"
+
+ (&redLel->user.factor_with_label)->factorWithAug = (&rhs[0]->user.factor_with_ep)->factorWithAug;
+
+
+#line 4366 "rlparse.cpp"
+} break;
+case 55: {
+#line 381 "rlparse.kl"
+
+ /* Add the target to the list and return the factor object. */
+ (&rhs[0]->user.factor_with_ep)->factorWithAug->epsilonLinks.append( EpsilonLink( (&rhs[1]->user.token)->loc, nameRef ) );
+ (&redLel->user.factor_with_ep)->factorWithAug = (&rhs[0]->user.factor_with_ep)->factorWithAug;
+
+
+#line 4376 "rlparse.cpp"
+} break;
+case 56: {
+#line 387 "rlparse.kl"
+
+ (&redLel->user.factor_with_ep)->factorWithAug = (&rhs[0]->user.factor_with_aug)->factorWithAug;
+
+
+#line 4384 "rlparse.cpp"
+} break;
+case 57: {
+#line 397 "rlparse.kl"
+
+ /* Append the action to the factorWithAug, record the refernce from
+ * factorWithAug to the action and pass up the factorWithAug. */
+ (&rhs[0]->user.factor_with_aug)->factorWithAug->actions.append(
+ ParserAction( (&rhs[1]->user.aug_type)->loc, (&rhs[1]->user.aug_type)->augType, 0, (&rhs[2]->user.action_ref)->action ) );
+ (&redLel->user.factor_with_aug)->factorWithAug = (&rhs[0]->user.factor_with_aug)->factorWithAug;
+
+
+#line 4396 "rlparse.cpp"
+} break;
+case 58: {
+#line 405 "rlparse.kl"
+
+ /* Append the named priority to the factorWithAug and pass it up. */
+ (&rhs[0]->user.factor_with_aug)->factorWithAug->priorityAugs.append(
+ PriorityAug( (&rhs[1]->user.aug_type)->augType, pd->curDefPriorKey, (&rhs[2]->user.priority_aug)->priorityNum ) );
+ (&redLel->user.factor_with_aug)->factorWithAug = (&rhs[0]->user.factor_with_aug)->factorWithAug;
+
+
+#line 4407 "rlparse.cpp"
+} break;
+case 59: {
+#line 412 "rlparse.kl"
+
+ /* Append the priority using a default name. */
+ (&rhs[0]->user.factor_with_aug)->factorWithAug->priorityAugs.append(
+ PriorityAug( (&rhs[1]->user.aug_type)->augType, (&rhs[3]->user.priority_name)->priorityName, (&rhs[5]->user.priority_aug)->priorityNum ) );
+ (&redLel->user.factor_with_aug)->factorWithAug = (&rhs[0]->user.factor_with_aug)->factorWithAug;
+
+
+#line 4418 "rlparse.cpp"
+} break;
+case 60: {
+#line 419 "rlparse.kl"
+
+ (&rhs[0]->user.factor_with_aug)->factorWithAug->conditions.append( ParserAction( (&rhs[1]->user.aug_type)->loc,
+ (&rhs[1]->user.aug_type)->augType, 0, (&rhs[2]->user.action_ref)->action ) );
+ (&redLel->user.factor_with_aug)->factorWithAug = (&rhs[0]->user.factor_with_aug)->factorWithAug;
+
+
+#line 4428 "rlparse.cpp"
+} break;
+case 61: {
+#line 425 "rlparse.kl"
+
+ /* Append the action, pass it up. */
+ (&rhs[0]->user.factor_with_aug)->factorWithAug->actions.append( ParserAction( (&rhs[1]->user.aug_type)->loc,
+ (&rhs[1]->user.aug_type)->augType, 0, (&rhs[2]->user.action_ref)->action ) );
+ (&redLel->user.factor_with_aug)->factorWithAug = (&rhs[0]->user.factor_with_aug)->factorWithAug;
+
+
+#line 4439 "rlparse.cpp"
+} break;
+case 62: {
+#line 432 "rlparse.kl"
+
+ /* Append the action, pass it up. */
+ (&rhs[0]->user.factor_with_aug)->factorWithAug->actions.append( ParserAction( (&rhs[1]->user.aug_type)->loc,
+ (&rhs[1]->user.aug_type)->augType, 0, (&rhs[2]->user.action_ref)->action ) );
+ (&redLel->user.factor_with_aug)->factorWithAug = (&rhs[0]->user.factor_with_aug)->factorWithAug;
+
+
+#line 4450 "rlparse.cpp"
+} break;
+case 63: {
+#line 439 "rlparse.kl"
+
+ /* Append the action, pass it up. */
+ (&rhs[0]->user.factor_with_aug)->factorWithAug->actions.append( ParserAction( (&rhs[1]->user.aug_type)->loc,
+ (&rhs[1]->user.aug_type)->augType, 0, (&rhs[2]->user.action_ref)->action ) );
+ (&redLel->user.factor_with_aug)->factorWithAug = (&rhs[0]->user.factor_with_aug)->factorWithAug;
+
+
+#line 4461 "rlparse.cpp"
+} break;
+case 64: {
+#line 446 "rlparse.kl"
+
+ /* Append the action to the factorWithAug, record the refernce from
+ * factorWithAug to the action and pass up the factorWithAug. */
+ (&rhs[0]->user.factor_with_aug)->factorWithAug->actions.append( ParserAction( (&rhs[1]->user.aug_type)->loc,
+ (&rhs[1]->user.aug_type)->augType, pd->curDefLocalErrKey, (&rhs[2]->user.action_ref)->action ) );
+ (&redLel->user.factor_with_aug)->factorWithAug = (&rhs[0]->user.factor_with_aug)->factorWithAug;
+
+
+#line 4473 "rlparse.cpp"
+} break;
+case 65: {
+#line 454 "rlparse.kl"
+
+ /* Append the action to the factorWithAug, record the refernce from
+ * factorWithAug to the action and pass up the factorWithAug. */
+ (&rhs[0]->user.factor_with_aug)->factorWithAug->actions.append( ParserAction( (&rhs[1]->user.aug_type)->loc,
+ (&rhs[1]->user.aug_type)->augType, pd->curDefLocalErrKey, (&rhs[2]->user.action_ref)->action ) );
+ (&redLel->user.factor_with_aug)->factorWithAug = (&rhs[0]->user.factor_with_aug)->factorWithAug;
+
+
+#line 4485 "rlparse.cpp"
+} break;
+case 66: {
+#line 462 "rlparse.kl"
+
+ /* Append the action to the factorWithAug, record the refernce from
+ * factorWithAug to the action and pass up the factorWithAug. */
+ (&rhs[0]->user.factor_with_aug)->factorWithAug->actions.append( ParserAction( (&rhs[1]->user.aug_type)->loc,
+ (&rhs[1]->user.aug_type)->augType, (&rhs[3]->user.local_err_name)->error_name, (&rhs[5]->user.action_ref)->action ) );
+ (&redLel->user.factor_with_aug)->factorWithAug = (&rhs[0]->user.factor_with_aug)->factorWithAug;
+
+
+#line 4497 "rlparse.cpp"
+} break;
+case 67: {
+#line 470 "rlparse.kl"
+
+ (&redLel->user.factor_with_aug)->factorWithAug = new FactorWithAug( (&rhs[0]->user.factor_with_rep)->factorWithRep );
+
+
+#line 4505 "rlparse.cpp"
+} break;
+case 68: {
+#line 483 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_finish;
+
+#line 4511 "rlparse.cpp"
+} break;
+case 69: {
+#line 484 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_leave;
+
+#line 4517 "rlparse.cpp"
+} break;
+case 70: {
+#line 485 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_all;
+
+#line 4523 "rlparse.cpp"
+} break;
+case 71: {
+#line 486 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_start;
+
+#line 4529 "rlparse.cpp"
+} break;
+case 72: {
+#line 491 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_start;
+
+#line 4535 "rlparse.cpp"
+} break;
+case 73: {
+#line 492 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_start;
+
+#line 4541 "rlparse.cpp"
+} break;
+case 74: {
+#line 493 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_all;
+
+#line 4547 "rlparse.cpp"
+} break;
+case 75: {
+#line 494 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_all;
+
+#line 4553 "rlparse.cpp"
+} break;
+case 76: {
+#line 495 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_leave;
+
+#line 4559 "rlparse.cpp"
+} break;
+case 77: {
+#line 496 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_leave;
+
+#line 4565 "rlparse.cpp"
+} break;
+case 78: {
+#line 497 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_all;
+
+#line 4571 "rlparse.cpp"
+} break;
+case 79: {
+#line 506 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_start_to_state;
+
+#line 4577 "rlparse.cpp"
+} break;
+case 80: {
+#line 508 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_start_to_state;
+
+#line 4583 "rlparse.cpp"
+} break;
+case 81: {
+#line 511 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_not_start_to_state;
+
+#line 4589 "rlparse.cpp"
+} break;
+case 82: {
+#line 513 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_not_start_to_state;
+
+#line 4595 "rlparse.cpp"
+} break;
+case 83: {
+#line 516 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_all_to_state;
+
+#line 4601 "rlparse.cpp"
+} break;
+case 84: {
+#line 518 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_all_to_state;
+
+#line 4607 "rlparse.cpp"
+} break;
+case 85: {
+#line 521 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_final_to_state;
+
+#line 4613 "rlparse.cpp"
+} break;
+case 86: {
+#line 523 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_final_to_state;
+
+#line 4619 "rlparse.cpp"
+} break;
+case 87: {
+#line 526 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_not_final_to_state;
+
+#line 4625 "rlparse.cpp"
+} break;
+case 88: {
+#line 528 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_not_final_to_state;
+
+#line 4631 "rlparse.cpp"
+} break;
+case 89: {
+#line 531 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_middle_to_state;
+
+#line 4637 "rlparse.cpp"
+} break;
+case 90: {
+#line 533 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_middle_to_state;
+
+#line 4643 "rlparse.cpp"
+} break;
+case 91: {
+#line 542 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_start_from_state;
+
+#line 4649 "rlparse.cpp"
+} break;
+case 92: {
+#line 544 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_start_from_state;
+
+#line 4655 "rlparse.cpp"
+} break;
+case 93: {
+#line 547 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_not_start_from_state;
+
+#line 4661 "rlparse.cpp"
+} break;
+case 94: {
+#line 549 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_not_start_from_state;
+
+#line 4667 "rlparse.cpp"
+} break;
+case 95: {
+#line 552 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_all_from_state;
+
+#line 4673 "rlparse.cpp"
+} break;
+case 96: {
+#line 554 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_all_from_state;
+
+#line 4679 "rlparse.cpp"
+} break;
+case 97: {
+#line 557 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_final_from_state;
+
+#line 4685 "rlparse.cpp"
+} break;
+case 98: {
+#line 559 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_final_from_state;
+
+#line 4691 "rlparse.cpp"
+} break;
+case 99: {
+#line 562 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_not_final_from_state;
+
+#line 4697 "rlparse.cpp"
+} break;
+case 100: {
+#line 564 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_not_final_from_state;
+
+#line 4703 "rlparse.cpp"
+} break;
+case 101: {
+#line 567 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_middle_from_state;
+
+#line 4709 "rlparse.cpp"
+} break;
+case 102: {
+#line 569 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_middle_from_state;
+
+#line 4715 "rlparse.cpp"
+} break;
+case 103: {
+#line 578 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_start_eof;
+
+#line 4721 "rlparse.cpp"
+} break;
+case 104: {
+#line 580 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_start_eof;
+
+#line 4727 "rlparse.cpp"
+} break;
+case 105: {
+#line 583 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_not_start_eof;
+
+#line 4733 "rlparse.cpp"
+} break;
+case 106: {
+#line 585 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_not_start_eof;
+
+#line 4739 "rlparse.cpp"
+} break;
+case 107: {
+#line 588 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_all_eof;
+
+#line 4745 "rlparse.cpp"
+} break;
+case 108: {
+#line 590 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_all_eof;
+
+#line 4751 "rlparse.cpp"
+} break;
+case 109: {
+#line 593 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_final_eof;
+
+#line 4757 "rlparse.cpp"
+} break;
+case 110: {
+#line 595 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_final_eof;
+
+#line 4763 "rlparse.cpp"
+} break;
+case 111: {
+#line 598 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_not_final_eof;
+
+#line 4769 "rlparse.cpp"
+} break;
+case 112: {
+#line 600 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_not_final_eof;
+
+#line 4775 "rlparse.cpp"
+} break;
+case 113: {
+#line 603 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_middle_eof;
+
+#line 4781 "rlparse.cpp"
+} break;
+case 114: {
+#line 605 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_middle_eof;
+
+#line 4787 "rlparse.cpp"
+} break;
+case 115: {
+#line 614 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_start_gbl_error;
+
+#line 4793 "rlparse.cpp"
+} break;
+case 116: {
+#line 616 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_start_gbl_error;
+
+#line 4799 "rlparse.cpp"
+} break;
+case 117: {
+#line 619 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_not_start_gbl_error;
+
+#line 4805 "rlparse.cpp"
+} break;
+case 118: {
+#line 621 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_not_start_gbl_error;
+
+#line 4811 "rlparse.cpp"
+} break;
+case 119: {
+#line 624 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_all_gbl_error;
+
+#line 4817 "rlparse.cpp"
+} break;
+case 120: {
+#line 626 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_all_gbl_error;
+
+#line 4823 "rlparse.cpp"
+} break;
+case 121: {
+#line 629 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_final_gbl_error;
+
+#line 4829 "rlparse.cpp"
+} break;
+case 122: {
+#line 631 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_final_gbl_error;
+
+#line 4835 "rlparse.cpp"
+} break;
+case 123: {
+#line 634 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_not_final_gbl_error;
+
+#line 4841 "rlparse.cpp"
+} break;
+case 124: {
+#line 636 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_not_final_gbl_error;
+
+#line 4847 "rlparse.cpp"
+} break;
+case 125: {
+#line 639 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_middle_gbl_error;
+
+#line 4853 "rlparse.cpp"
+} break;
+case 126: {
+#line 641 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_middle_gbl_error;
+
+#line 4859 "rlparse.cpp"
+} break;
+case 127: {
+#line 651 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_start_local_error;
+
+#line 4865 "rlparse.cpp"
+} break;
+case 128: {
+#line 653 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_start_local_error;
+
+#line 4871 "rlparse.cpp"
+} break;
+case 129: {
+#line 656 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_not_start_local_error;
+
+#line 4877 "rlparse.cpp"
+} break;
+case 130: {
+#line 658 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_not_start_local_error;
+
+#line 4883 "rlparse.cpp"
+} break;
+case 131: {
+#line 661 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_all_local_error;
+
+#line 4889 "rlparse.cpp"
+} break;
+case 132: {
+#line 663 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_all_local_error;
+
+#line 4895 "rlparse.cpp"
+} break;
+case 133: {
+#line 666 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_final_local_error;
+
+#line 4901 "rlparse.cpp"
+} break;
+case 134: {
+#line 668 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_final_local_error;
+
+#line 4907 "rlparse.cpp"
+} break;
+case 135: {
+#line 671 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_not_final_local_error;
+
+#line 4913 "rlparse.cpp"
+} break;
+case 136: {
+#line 673 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_not_final_local_error;
+
+#line 4919 "rlparse.cpp"
+} break;
+case 137: {
+#line 676 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_middle_local_error;
+
+#line 4925 "rlparse.cpp"
+} break;
+case 138: {
+#line 678 "rlparse.kl"
+ (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_middle_local_error;
+
+#line 4931 "rlparse.cpp"
+} break;
+case 139: {
+#line 691 "rlparse.kl"
+ (&redLel->user.action_ref)->action = (&rhs[0]->user.action_ref)->action;
+
+#line 4937 "rlparse.cpp"
+} break;
+case 140: {
+#line 692 "rlparse.kl"
+ (&redLel->user.action_ref)->action = (&rhs[1]->user.action_ref)->action;
+
+#line 4943 "rlparse.cpp"
+} break;
+case 141: {
+#line 693 "rlparse.kl"
+ (&redLel->user.action_ref)->action = (&rhs[0]->user.action_ref)->action;
+
+#line 4949 "rlparse.cpp"
+} break;
+case 142: {
+#line 698 "rlparse.kl"
+
+ /* Set the name in the actionDict. */
+ Action *action = pd->actionDict.find( (&rhs[0]->user.token)->data );
+ if ( action != 0 ) {
+ /* Pass up the action element */
+ (&redLel->user.action_ref)->action = action;
+ }
+ else {
+ /* Will recover by returning null as the action. */
+ error((&rhs[0]->user.token)->loc) << "action lookup of \"" << (&rhs[0]->user.token)->data << "\" failed" << endl;
+ (&redLel->user.action_ref)->action = 0;
+ }
+
+
+#line 4967 "rlparse.cpp"
+} break;
+case 143: {
+#line 715 "rlparse.kl"
+
+ /* Create the action, add it to the list and pass up. */
+ Action *newAction = new Action( (&rhs[0]->user.token)->loc, 0, (&rhs[1]->user.inline_list)->inlineList, pd->nextCondId++ );
+ pd->actionList.append( newAction );
+ (&redLel->user.action_ref)->action = newAction;
+
+
+#line 4978 "rlparse.cpp"
+} break;
+case 144: {
+#line 730 "rlparse.kl"
+
+ // Lookup/create the priority key.
+ PriorDictEl *priorDictEl;
+ if ( pd->priorDict.insert( (&rhs[0]->user.token)->data, pd->nextPriorKey, &priorDictEl ) )
+ pd->nextPriorKey += 1;
+
+ // Use the inserted/found priority key.
+ (&redLel->user.priority_name)->priorityName = priorDictEl->value;
+
+
+#line 4992 "rlparse.cpp"
+} break;
+case 145: {
+#line 747 "rlparse.kl"
+
+ // Convert the priority number to a long. Check for overflow.
+ errno = 0;
+ //cerr << "PRIOR AUG: " << $1->token.data << endl;
+ long aug = strtol( (&rhs[0]->user.token_type)->token.data, 0, 10 );
+ if ( errno == ERANGE && aug == LONG_MAX ) {
+ /* Priority number too large. Recover by setting the priority to 0. */
+ error((&rhs[0]->user.token_type)->token.loc) << "priority number " << (&rhs[0]->user.token_type)->token.data <<
+ " overflows" << endl;
+ (&redLel->user.priority_aug)->priorityNum = 0;
+ }
+ else if ( errno == ERANGE && aug == LONG_MIN ) {
+ /* Priority number too large in the neg. Recover by using 0. */
+ error((&rhs[0]->user.token_type)->token.loc) << "priority number " << (&rhs[0]->user.token_type)->token.data <<
+ " underflows" << endl;
+ (&redLel->user.priority_aug)->priorityNum = 0;
+ }
+ else {
+ /* No overflow or underflow. */
+ (&redLel->user.priority_aug)->priorityNum = aug;
+ }
+
+
+#line 5019 "rlparse.cpp"
+} break;
+case 146: {
+#line 773 "rlparse.kl"
+
+ (&redLel->user.token_type)->token = *(&rhs[0]->user.token);
+
+
+#line 5027 "rlparse.cpp"
+} break;
+case 147: {
+#line 777 "rlparse.kl"
+
+ (&redLel->user.token_type)->token.set( "+", 1 );
+ (&redLel->user.token_type)->token.loc = (&rhs[0]->user.token)->loc;
+ (&redLel->user.token_type)->token.append( *(&rhs[1]->user.token) );
+
+
+#line 5037 "rlparse.cpp"
+} break;
+case 148: {
+#line 783 "rlparse.kl"
+
+ (&redLel->user.token_type)->token.set( "-", 1 );
+ (&redLel->user.token_type)->token.loc = (&rhs[0]->user.token)->loc;
+ (&redLel->user.token_type)->token.append( *(&rhs[1]->user.token) );
+
+
+#line 5047 "rlparse.cpp"
+} break;
+case 149: {
+#line 795 "rlparse.kl"
+
+ /* Lookup/create the priority key. */
+ LocalErrDictEl *localErrDictEl;
+ if ( pd->localErrDict.insert( (&rhs[0]->user.token)->data, pd->nextLocalErrKey, &localErrDictEl ) )
+ pd->nextLocalErrKey += 1;
+
+ /* Use the inserted/found priority key. */
+ (&redLel->user.local_err_name)->error_name = localErrDictEl->value;
+
+
+#line 5061 "rlparse.cpp"
+} break;
+case 150: {
+#line 816 "rlparse.kl"
+
+ (&redLel->user.factor_with_rep)->factorWithRep = new FactorWithRep( (&rhs[1]->user.token)->loc, (&rhs[0]->user.factor_with_rep)->factorWithRep,
+ 0, 0, FactorWithRep::StarType );
+
+
+#line 5070 "rlparse.cpp"
+} break;
+case 151: {
+#line 821 "rlparse.kl"
+
+ (&redLel->user.factor_with_rep)->factorWithRep = new FactorWithRep( (&rhs[1]->user.token)->loc, (&rhs[0]->user.factor_with_rep)->factorWithRep,
+ 0, 0, FactorWithRep::StarStarType );
+
+
+#line 5079 "rlparse.cpp"
+} break;
+case 152: {
+#line 826 "rlparse.kl"
+
+ (&redLel->user.factor_with_rep)->factorWithRep = new FactorWithRep( (&rhs[1]->user.token)->loc, (&rhs[0]->user.factor_with_rep)->factorWithRep,
+ 0, 0, FactorWithRep::OptionalType );
+
+
+#line 5088 "rlparse.cpp"
+} break;
+case 153: {
+#line 831 "rlparse.kl"
+
+ (&redLel->user.factor_with_rep)->factorWithRep = new FactorWithRep( (&rhs[1]->user.token)->loc, (&rhs[0]->user.factor_with_rep)->factorWithRep,
+ 0, 0, FactorWithRep::PlusType );
+
+
+#line 5097 "rlparse.cpp"
+} break;
+case 154: {
+#line 836 "rlparse.kl"
+
+ (&redLel->user.factor_with_rep)->factorWithRep = new FactorWithRep( (&rhs[1]->user.token)->loc, (&rhs[0]->user.factor_with_rep)->factorWithRep,
+ (&rhs[2]->user.factor_rep_num)->rep, 0, FactorWithRep::ExactType );
+
+
+#line 5106 "rlparse.cpp"
+} break;
+case 155: {
+#line 841 "rlparse.kl"
+
+ (&redLel->user.factor_with_rep)->factorWithRep = new FactorWithRep( (&rhs[1]->user.token)->loc, (&rhs[0]->user.factor_with_rep)->factorWithRep,
+ 0, (&rhs[3]->user.factor_rep_num)->rep, FactorWithRep::MaxType );
+
+
+#line 5115 "rlparse.cpp"
+} break;
+case 156: {
+#line 846 "rlparse.kl"
+
+ (&redLel->user.factor_with_rep)->factorWithRep = new FactorWithRep( (&rhs[1]->user.token)->loc, (&rhs[0]->user.factor_with_rep)->factorWithRep,
+ (&rhs[2]->user.factor_rep_num)->rep, 0, FactorWithRep::MinType );
+
+
+#line 5124 "rlparse.cpp"
+} break;
+case 157: {
+#line 851 "rlparse.kl"
+
+ (&redLel->user.factor_with_rep)->factorWithRep = new FactorWithRep( (&rhs[1]->user.token)->loc, (&rhs[0]->user.factor_with_rep)->factorWithRep,
+ (&rhs[2]->user.factor_rep_num)->rep, (&rhs[4]->user.factor_rep_num)->rep, FactorWithRep::RangeType );
+
+
+#line 5133 "rlparse.cpp"
+} break;
+case 158: {
+#line 856 "rlparse.kl"
+
+ (&redLel->user.factor_with_rep)->factorWithRep = new FactorWithRep( (&rhs[0]->user.factor_with_neg)->factorWithNeg );
+
+
+#line 5141 "rlparse.cpp"
+} break;
+case 159: {
+#line 866 "rlparse.kl"
+
+ // Convert the priority number to a long. Check for overflow.
+ errno = 0;
+ long rep = strtol( (&rhs[0]->user.token)->data, 0, 10 );
+ if ( errno == ERANGE && rep == LONG_MAX ) {
+ // Repetition too large. Recover by returing repetition 1. */
+ error((&rhs[0]->user.token)->loc) << "repetition number " << (&rhs[0]->user.token)->data << " overflows" << endl;
+ (&redLel->user.factor_rep_num)->rep = 1;
+ }
+ else {
+ // Cannot be negative, so no overflow.
+ (&redLel->user.factor_rep_num)->rep = rep;
+ }
+
+
+#line 5160 "rlparse.cpp"
+} break;
+case 160: {
+#line 892 "rlparse.kl"
+
+ (&redLel->user.factor_with_neg)->factorWithNeg = new FactorWithNeg( (&rhs[0]->user.token)->loc,
+ (&rhs[1]->user.factor_with_neg)->factorWithNeg, FactorWithNeg::NegateType );
+
+
+#line 5169 "rlparse.cpp"
+} break;
+case 161: {
+#line 897 "rlparse.kl"
+
+ (&redLel->user.factor_with_neg)->factorWithNeg = new FactorWithNeg( (&rhs[0]->user.token)->loc,
+ (&rhs[1]->user.factor_with_neg)->factorWithNeg, FactorWithNeg::CharNegateType );
+
+
+#line 5178 "rlparse.cpp"
+} break;
+case 162: {
+#line 902 "rlparse.kl"
+
+ (&redLel->user.factor_with_neg)->factorWithNeg = new FactorWithNeg( (&rhs[0]->user.factor)->factor );
+
+
+#line 5186 "rlparse.cpp"
+} break;
+case 163: {
+#line 912 "rlparse.kl"
+
+ /* Create a new factor node going to a concat literal. */
+ (&redLel->user.factor)->factor = new Factor( new Literal( *(&rhs[0]->user.token), Literal::LitString ) );
+
+
+#line 5195 "rlparse.cpp"
+} break;
+case 164: {
+#line 917 "rlparse.kl"
+
+ /* Create a new factor node going to a literal number. */
+ (&redLel->user.factor)->factor = new Factor( new Literal( (&rhs[0]->user.token_type)->token, Literal::Number ) );
+
+
+#line 5204 "rlparse.cpp"
+} break;
+case 165: {
+#line 922 "rlparse.kl"
+
+ /* Find the named graph. */
+ GraphDictEl *gdNode = pd->graphDict.find( (&rhs[0]->user.token)->data );
+ if ( gdNode == 0 ) {
+ /* Recover by returning null as the factor node. */
+ error((&rhs[0]->user.token)->loc) << "graph lookup of \"" << (&rhs[0]->user.token)->data << "\" failed" << endl;
+ (&redLel->user.factor)->factor = 0;
+ }
+ else if ( gdNode->isInstance ) {
+ /* Recover by retuning null as the factor node. */
+ error((&rhs[0]->user.token)->loc) << "references to graph instantiations not allowed "
+ "in expressions" << endl;
+ (&redLel->user.factor)->factor = 0;
+ }
+ else {
+ /* Create a factor node that is a lookup of an expression. */
+ (&redLel->user.factor)->factor = new Factor( (&rhs[0]->user.token)->loc, gdNode->value );
+ }
+
+
+#line 5228 "rlparse.cpp"
+} break;
+case 166: {
+#line 942 "rlparse.kl"
+
+ /* Create a new factor node going to an OR expression. */
+ (&redLel->user.factor)->factor = new Factor( new ReItem( (&rhs[0]->user.token)->loc, (&rhs[1]->user.regular_expr_or_data)->reOrBlock, ReItem::OrBlock ) );
+
+
+#line 5237 "rlparse.cpp"
+} break;
+case 167: {
+#line 947 "rlparse.kl"
+
+ /* Create a new factor node going to a negated OR expression. */
+ (&redLel->user.factor)->factor = new Factor( new ReItem( (&rhs[0]->user.token)->loc, (&rhs[1]->user.regular_expr_or_data)->reOrBlock, ReItem::NegOrBlock ) );
+
+
+#line 5246 "rlparse.cpp"
+} break;
+case 168: {
+#line 952 "rlparse.kl"
+
+ if ( (&rhs[2]->user.token)->length > 1 ) {
+ for ( char *p = (&rhs[2]->user.token)->data; *p != 0; p++ ) {
+ if ( *p == 'i' )
+ (&rhs[1]->user.regular_expr)->regExpr->caseInsensitive = true;
+ }
+ }
+
+ /* Create a new factor node going to a regular exp. */
+ (&redLel->user.factor)->factor = new Factor( (&rhs[1]->user.regular_expr)->regExpr );
+
+
+#line 5262 "rlparse.cpp"
+} break;
+case 169: {
+#line 964 "rlparse.kl"
+
+ /* Create a new factor node going to a range. */
+ (&redLel->user.factor)->factor = new Factor( new Range( (&rhs[0]->user.range_lit)->literal, (&rhs[2]->user.range_lit)->literal ) );
+
+
+#line 5271 "rlparse.cpp"
+} break;
+case 170: {
+#line 969 "rlparse.kl"
+
+ /* Create a new factor going to a parenthesized join. */
+ (&redLel->user.factor)->factor = new Factor( (&rhs[1]->user.join)->join );
+
+
+#line 5280 "rlparse.cpp"
+} break;
+case 171: {
+#line 981 "rlparse.kl"
+
+ /* Range literas must have only one char. We restrict this in the parse tree. */
+ (&redLel->user.range_lit)->literal = new Literal( *(&rhs[0]->user.token), Literal::LitString );
+
+
+#line 5289 "rlparse.cpp"
+} break;
+case 172: {
+#line 986 "rlparse.kl"
+
+ /* Create a new literal number. */
+ (&redLel->user.range_lit)->literal = new Literal( (&rhs[0]->user.token_type)->token, Literal::Number );
+
+
+#line 5298 "rlparse.cpp"
+} break;
+case 173: {
+#line 995 "rlparse.kl"
+
+ (&redLel->user.token_type)->token = *(&rhs[0]->user.token);
+
+
+#line 5306 "rlparse.cpp"
+} break;
+case 174: {
+#line 999 "rlparse.kl"
+
+ (&redLel->user.token_type)->token.set( "-", 1 );
+ (&redLel->user.token_type)->token.loc = (&rhs[0]->user.token)->loc;
+ (&redLel->user.token_type)->token.append( *(&rhs[1]->user.token) );
+
+
+#line 5316 "rlparse.cpp"
+} break;
+case 175: {
+#line 1005 "rlparse.kl"
+
+ (&redLel->user.token_type)->token = *(&rhs[0]->user.token);
+
+
+#line 5324 "rlparse.cpp"
+} break;
+case 176: {
+#line 1020 "rlparse.kl"
+
+ /* An optimization to lessen the tree size. If a non-starred char is
+ * directly under the left side on the right and the right side is
+ * another non-starred char then paste them together and return the
+ * left side. Otherwise just put the two under a new reg exp node. */
+ if ( (&rhs[1]->user.regular_expr_item)->reItem->type == ReItem::Data && !(&rhs[1]->user.regular_expr_item)->reItem->star &&
+ (&rhs[0]->user.regular_expr)->regExpr->type == RegExpr::RecurseItem &&
+ (&rhs[0]->user.regular_expr)->regExpr->item->type == ReItem::Data && !(&rhs[0]->user.regular_expr)->regExpr->item->star )
+ {
+ /* Append the right side to the right side of the left and toss the
+ * right side. */
+ (&rhs[0]->user.regular_expr)->regExpr->item->token.append( (&rhs[1]->user.regular_expr_item)->reItem->token );
+ delete (&rhs[1]->user.regular_expr_item)->reItem;
+ (&redLel->user.regular_expr)->regExpr = (&rhs[0]->user.regular_expr)->regExpr;
+ }
+ else {
+ (&redLel->user.regular_expr)->regExpr = new RegExpr( (&rhs[0]->user.regular_expr)->regExpr, (&rhs[1]->user.regular_expr_item)->reItem );
+ }
+
+
+#line 5348 "rlparse.cpp"
+} break;
+case 177: {
+#line 1040 "rlparse.kl"
+
+ /* Can't optimize the tree. */
+ (&redLel->user.regular_expr)->regExpr = new RegExpr();
+
+
+#line 5357 "rlparse.cpp"
+} break;
+case 178: {
+#line 1052 "rlparse.kl"
+
+ (&rhs[0]->user.regular_expr_char)->reItem->star = true;
+ (&redLel->user.regular_expr_item)->reItem = (&rhs[0]->user.regular_expr_char)->reItem;
+
+
+#line 5366 "rlparse.cpp"
+} break;
+case 179: {
+#line 1057 "rlparse.kl"
+
+ (&redLel->user.regular_expr_item)->reItem = (&rhs[0]->user.regular_expr_char)->reItem;
+
+
+#line 5374 "rlparse.cpp"
+} break;
+case 180: {
+#line 1069 "rlparse.kl"
+
+ (&redLel->user.regular_expr_char)->reItem = new ReItem( (&rhs[0]->user.token)->loc, (&rhs[1]->user.regular_expr_or_data)->reOrBlock, ReItem::OrBlock );
+
+
+#line 5382 "rlparse.cpp"
+} break;
+case 181: {
+#line 1073 "rlparse.kl"
+
+ (&redLel->user.regular_expr_char)->reItem = new ReItem( (&rhs[0]->user.token)->loc, (&rhs[1]->user.regular_expr_or_data)->reOrBlock, ReItem::NegOrBlock );
+
+
+#line 5390 "rlparse.cpp"
+} break;
+case 182: {
+#line 1077 "rlparse.kl"
+
+ (&redLel->user.regular_expr_char)->reItem = new ReItem( (&rhs[0]->user.token)->loc, ReItem::Dot );
+
+
+#line 5398 "rlparse.cpp"
+} break;
+case 183: {
+#line 1081 "rlparse.kl"
+
+ (&redLel->user.regular_expr_char)->reItem = new ReItem( (&rhs[0]->user.token)->loc, *(&rhs[0]->user.token) );
+
+
+#line 5406 "rlparse.cpp"
+} break;
+case 184: {
+#line 1093 "rlparse.kl"
+
+ /* An optimization to lessen the tree size. If an or char is directly
+ * under the left side on the right and the right side is another or
+ * char then paste them together and return the left side. Otherwise
+ * just put the two under a new or data node. */
+ if ( (&rhs[1]->user.regular_expr_or_char)->reOrItem->type == ReOrItem::Data &&
+ (&rhs[0]->user.regular_expr_or_data)->reOrBlock->type == ReOrBlock::RecurseItem &&
+ (&rhs[0]->user.regular_expr_or_data)->reOrBlock->item->type == ReOrItem::Data )
+ {
+ /* Append the right side to right side of the left and toss the
+ * right side. */
+ (&rhs[0]->user.regular_expr_or_data)->reOrBlock->item->token.append( (&rhs[1]->user.regular_expr_or_char)->reOrItem->token );
+ delete (&rhs[1]->user.regular_expr_or_char)->reOrItem;
+ (&redLel->user.regular_expr_or_data)->reOrBlock = (&rhs[0]->user.regular_expr_or_data)->reOrBlock;
+ }
+ else {
+ /* Can't optimize, put the left and right under a new node. */
+ (&redLel->user.regular_expr_or_data)->reOrBlock = new ReOrBlock( (&rhs[0]->user.regular_expr_or_data)->reOrBlock, (&rhs[1]->user.regular_expr_or_char)->reOrItem );
+ }
+
+
+#line 5431 "rlparse.cpp"
+} break;
+case 185: {
+#line 1114 "rlparse.kl"
+
+ (&redLel->user.regular_expr_or_data)->reOrBlock = new ReOrBlock();
+
+
+#line 5439 "rlparse.cpp"
+} break;
+case 186: {
+#line 1126 "rlparse.kl"
+
+ (&redLel->user.regular_expr_or_char)->reOrItem = new ReOrItem( (&rhs[0]->user.token)->loc, *(&rhs[0]->user.token) );
+
+
+#line 5447 "rlparse.cpp"
+} break;
+case 187: {
+#line 1130 "rlparse.kl"
+
+ (&redLel->user.regular_expr_or_char)->reOrItem = new ReOrItem( (&rhs[1]->user.token)->loc, (&rhs[0]->user.token)->data[0], (&rhs[2]->user.token)->data[0] );
+
+
+#line 5455 "rlparse.cpp"
+} break;
+case 188: {
+#line 1147 "rlparse.kl"
+
+ /* Append the item to the list, return the list. */
+ (&redLel->user.inline_list)->inlineList = (&rhs[0]->user.inline_list)->inlineList;
+ (&redLel->user.inline_list)->inlineList->append( (&rhs[1]->user.inline_item)->inlineItem );
+
+
+#line 5465 "rlparse.cpp"
+} break;
+case 189: {
+#line 1154 "rlparse.kl"
+
+ /* Start with empty list. */
+ (&redLel->user.inline_list)->inlineList = new InlineList;
+
+
+#line 5474 "rlparse.cpp"
+} break;
+case 190: {
+#line 1169 "rlparse.kl"
+
+ (&redLel->user.inline_item)->inlineItem = new InlineItem( (&rhs[0]->user.token_type)->token.loc, (&rhs[0]->user.token_type)->token.data, InlineItem::Text );
+
+
+#line 5482 "rlparse.cpp"
+} break;
+case 191: {
+#line 1175 "rlparse.kl"
+
+ (&redLel->user.inline_item)->inlineItem = new InlineItem( (&rhs[0]->user.token_type)->token.loc, (&rhs[0]->user.token_type)->token.data, InlineItem::Text );
+
+
+#line 5490 "rlparse.cpp"
+} break;
+case 192: {
+#line 1181 "rlparse.kl"
+
+ /* Pass the inline item up. */
+ (&redLel->user.inline_item)->inlineItem = (&rhs[0]->user.inline_item)->inlineItem;
+
+
+#line 5499 "rlparse.cpp"
+} break;
+case 193: {
+#line 1188 "rlparse.kl"
+ (&redLel->user.token_type)->token = *(&rhs[0]->user.token);
+
+#line 5505 "rlparse.cpp"
+} break;
+case 194: {
+#line 1189 "rlparse.kl"
+ (&redLel->user.token_type)->token = *(&rhs[0]->user.token);
+
+#line 5511 "rlparse.cpp"
+} break;
+case 195: {
+#line 1190 "rlparse.kl"
+ (&redLel->user.token_type)->token = *(&rhs[0]->user.token);
+
+#line 5517 "rlparse.cpp"
+} break;
+case 196: {
+#line 1191 "rlparse.kl"
+ (&redLel->user.token_type)->token = *(&rhs[0]->user.token);
+
+#line 5523 "rlparse.cpp"
+} break;
+case 197: {
+#line 1192 "rlparse.kl"
+ (&redLel->user.token_type)->token = *(&rhs[0]->user.token);
+
+#line 5529 "rlparse.cpp"
+} break;
+case 198: {
+#line 1193 "rlparse.kl"
+ (&redLel->user.token_type)->token = *(&rhs[0]->user.token);
+
+#line 5535 "rlparse.cpp"
+} break;
+case 199: {
+#line 1197 "rlparse.kl"
+
+ /* Pass up interpreted items of inline expressions. */
+ (&redLel->user.inline_item)->inlineItem = (&rhs[0]->user.inline_item)->inlineItem;
+
+
+#line 5544 "rlparse.cpp"
+} break;
+case 200: {
+#line 1202 "rlparse.kl"
+
+ (&redLel->user.inline_item)->inlineItem = new InlineItem( (&rhs[0]->user.token)->loc, InlineItem::Hold );
+
+
+#line 5552 "rlparse.cpp"
+} break;
+case 201: {
+#line 1206 "rlparse.kl"
+
+ (&redLel->user.inline_item)->inlineItem = new InlineItem( (&rhs[0]->user.token)->loc, InlineItem::Exec );
+ (&redLel->user.inline_item)->inlineItem->children = (&rhs[1]->user.inline_list)->inlineList;
+
+
+#line 5561 "rlparse.cpp"
+} break;
+case 202: {
+#line 1211 "rlparse.kl"
+
+ (&redLel->user.inline_item)->inlineItem = new InlineItem( (&rhs[0]->user.token)->loc,
+ new NameRef(nameRef), InlineItem::Goto );
+
+
+#line 5570 "rlparse.cpp"
+} break;
+case 203: {
+#line 1216 "rlparse.kl"
+
+ (&redLel->user.inline_item)->inlineItem = new InlineItem( (&rhs[0]->user.token)->loc, InlineItem::GotoExpr );
+ (&redLel->user.inline_item)->inlineItem->children = (&rhs[2]->user.inline_list)->inlineList;
+
+
+#line 5579 "rlparse.cpp"
+} break;
+case 204: {
+#line 1221 "rlparse.kl"
+
+ (&redLel->user.inline_item)->inlineItem = new InlineItem( (&rhs[0]->user.token)->loc, new NameRef(nameRef), InlineItem::Next );
+
+
+#line 5587 "rlparse.cpp"
+} break;
+case 205: {
+#line 1225 "rlparse.kl"
+
+ (&redLel->user.inline_item)->inlineItem = new InlineItem( (&rhs[0]->user.token)->loc, InlineItem::NextExpr );
+ (&redLel->user.inline_item)->inlineItem->children = (&rhs[2]->user.inline_list)->inlineList;
+
+
+#line 5596 "rlparse.cpp"
+} break;
+case 206: {
+#line 1230 "rlparse.kl"
+
+ (&redLel->user.inline_item)->inlineItem = new InlineItem( (&rhs[0]->user.token)->loc, new NameRef(nameRef), InlineItem::Call );
+
+
+#line 5604 "rlparse.cpp"
+} break;
+case 207: {
+#line 1234 "rlparse.kl"
+
+ (&redLel->user.inline_item)->inlineItem = new InlineItem( (&rhs[0]->user.token)->loc, InlineItem::CallExpr );
+ (&redLel->user.inline_item)->inlineItem->children = (&rhs[2]->user.inline_list)->inlineList;
+
+
+#line 5613 "rlparse.cpp"
+} break;
+case 208: {
+#line 1239 "rlparse.kl"
+
+ (&redLel->user.inline_item)->inlineItem = new InlineItem( (&rhs[0]->user.token)->loc, InlineItem::Ret );
+
+
+#line 5621 "rlparse.cpp"
+} break;
+case 209: {
+#line 1243 "rlparse.kl"
+
+ (&redLel->user.inline_item)->inlineItem = new InlineItem( (&rhs[0]->user.token)->loc, InlineItem::Break );
+
+
+#line 5629 "rlparse.cpp"
+} break;
+case 210: {
+#line 1251 "rlparse.kl"
+
+ (&redLel->user.inline_list)->inlineList = (&rhs[0]->user.inline_list)->inlineList;
+ (&redLel->user.inline_list)->inlineList->append( (&rhs[1]->user.inline_item)->inlineItem );
+
+
+#line 5638 "rlparse.cpp"
+} break;
+case 211: {
+#line 1256 "rlparse.kl"
+
+ /* Init the list used for this expr. */
+ (&redLel->user.inline_list)->inlineList = new InlineList;
+
+
+#line 5647 "rlparse.cpp"
+} break;
+case 212: {
+#line 1265 "rlparse.kl"
+
+ /* Return a text segment. */
+ (&redLel->user.inline_item)->inlineItem = new InlineItem( (&rhs[0]->user.token_type)->token.loc, (&rhs[0]->user.token_type)->token.data, InlineItem::Text );
+
+
+#line 5656 "rlparse.cpp"
+} break;
+case 213: {
+#line 1271 "rlparse.kl"
+
+ /* Return a text segment, must heap alloc the text. */
+ (&redLel->user.inline_item)->inlineItem = new InlineItem( (&rhs[0]->user.token_type)->token.loc, (&rhs[0]->user.token_type)->token.data, InlineItem::Text );
+
+
+#line 5665 "rlparse.cpp"
+} break;
+case 214: {
+#line 1277 "rlparse.kl"
+
+ /* Pass the inline item up. */
+ (&redLel->user.inline_item)->inlineItem = (&rhs[0]->user.inline_item)->inlineItem;
+
+
+#line 5674 "rlparse.cpp"
+} break;
+case 227: {
+#line 1307 "rlparse.kl"
+
+ (&redLel->user.inline_item)->inlineItem = new InlineItem( (&rhs[0]->user.token)->loc, InlineItem::PChar );
+
+
+#line 5682 "rlparse.cpp"
+} break;
+case 228: {
+#line 1312 "rlparse.kl"
+
+ (&redLel->user.inline_item)->inlineItem = new InlineItem( (&rhs[0]->user.token)->loc, InlineItem::Char );
+
+
+#line 5690 "rlparse.cpp"
+} break;
+case 229: {
+#line 1317 "rlparse.kl"
+
+ (&redLel->user.inline_item)->inlineItem = new InlineItem( (&rhs[0]->user.token)->loc, InlineItem::Curs );
+
+
+#line 5698 "rlparse.cpp"
+} break;
+case 230: {
+#line 1322 "rlparse.kl"
+
+ (&redLel->user.inline_item)->inlineItem = new InlineItem( (&rhs[0]->user.token)->loc, InlineItem::Targs );
+
+
+#line 5706 "rlparse.cpp"
+} break;
+case 231: {
+#line 1327 "rlparse.kl"
+
+ (&redLel->user.inline_item)->inlineItem = new InlineItem( (&rhs[0]->user.token)->loc,
+ new NameRef(nameRef), InlineItem::Entry );
+
+
+#line 5715 "rlparse.cpp"
+} break;
+case 233: {
+#line 1338 "rlparse.kl"
+
+ nameRef.empty();
+
+
+#line 5723 "rlparse.cpp"
+} break;
+case 235: {
+#line 1348 "rlparse.kl"
+
+ /* Insert an initial null pointer val to indicate the existence of the
+ * initial name seperator. */
+ nameRef.setAs( 0 );
+
+
+#line 5733 "rlparse.cpp"
+} break;
+case 236: {
+#line 1354 "rlparse.kl"
+
+ nameRef.empty();
+
+
+#line 5741 "rlparse.cpp"
+} break;
+case 237: {
+#line 1361 "rlparse.kl"
+
+ nameRef.append( (&rhs[2]->user.token)->data );
+
+
+#line 5749 "rlparse.cpp"
+} break;
+case 238: {
+#line 1366 "rlparse.kl"
+
+ nameRef.append( (&rhs[0]->user.token)->data );
+
+
+#line 5757 "rlparse.cpp"
+} break;
+}
+ }
+}
+
+ if ( lel->child != 0 ) {
+ struct Parser_LangEl *first = lel->child;
+ struct Parser_LangEl *child = lel->child;
+ numNodes -= 1;
+ lel->child = 0;
+ while ( child->next != 0 ) {
+ child = child->next;
+ numNodes -= 1;
+ }
+ child->next = pool;
+ pool = first;
+ }
+ }
+
+hit_final:
+ if ( sp > 0 ) {
+ /* Figure out which place to return to. */
+ if ( cmStack[sp-1]->next == lel ) {
+ lel = cmStack[--sp];
+ goto final_reverse;
+ }
+ else {
+ lel = cmStack[--sp];
+ goto final_upwards;
+ }
+ }
+
+ lastFinal = lel;
+ free( cmStack );
+ }
+ }
+ }
+
+ if ( *action & 0x2 ) {
+ int fssRed = *action >> 2;
+ int reduction = Parser_fssProdIdIndex[fssRed];
+ struct Parser_LangEl *redLel;
+ if ( pool == 0 ) {
+ if ( freshPos == 8128 ) {
+ freshEl = (struct Parser_LangEl*) malloc(
+ sizeof(struct Parser_LangEl)*8128);
+ #ifdef LOG_ACTIONS
+ cerr << "allocating 8128 LangEls" << endl;
+ #endif
+ freshPos = 0;
+ }
+ redLel = freshEl + freshPos++;
+ }
+ else {
+ redLel = pool;
+ pool = pool->next;
+ }
+ numNodes += 1;
+ redLel->type = Parser_prodLhsIds[reduction];
+ redLel->reduction = reduction;
+ redLel->child = 0;
+ redLel->next = 0;
+ redLel->retry = (lel->retry << 16);
+ lel->retry &= 0xffff0000;
+
+ rhsLen = Parser_fssProdLengths[fssRed];
+ if ( rhsLen > 0 ) {
+ int r;
+ for ( r = rhsLen-1; r > 0; r-- ) {
+ rhs[r] = stackTop;
+ stackTop = stackTop->next;
+ }
+ rhs[0] = stackTop;
+ stackTop = stackTop->next;
+ rhs[0]->next = 0;
+ }
+switch ( reduction ) {
+case 215: {
+#line 1284 "rlparse.kl"
+ (&redLel->user.token_type)->token = *(&rhs[0]->user.token);
+
+#line 5839 "rlparse.cpp"
+} break;
+case 216: {
+#line 1285 "rlparse.kl"
+ (&redLel->user.token_type)->token = *(&rhs[0]->user.token);
+
+#line 5845 "rlparse.cpp"
+} break;
+case 217: {
+#line 1286 "rlparse.kl"
+ (&redLel->user.token_type)->token = *(&rhs[0]->user.token);
+
+#line 5851 "rlparse.cpp"
+} break;
+case 218: {
+#line 1287 "rlparse.kl"
+ (&redLel->user.token_type)->token = *(&rhs[0]->user.token);
+
+#line 5857 "rlparse.cpp"
+} break;
+case 219: {
+#line 1288 "rlparse.kl"
+ (&redLel->user.token_type)->token = *(&rhs[0]->user.token);
+
+#line 5863 "rlparse.cpp"
+} break;
+case 220: {
+#line 1289 "rlparse.kl"
+ (&redLel->user.token_type)->token = *(&rhs[0]->user.token);
+
+#line 5869 "rlparse.cpp"
+} break;
+case 221: {
+#line 1290 "rlparse.kl"
+ (&redLel->user.token_type)->token = *(&rhs[0]->user.token);
+
+#line 5875 "rlparse.cpp"
+} break;
+case 222: {
+#line 1297 "rlparse.kl"
+ (&redLel->user.token_type)->token = *(&rhs[0]->user.token);
+
+#line 5881 "rlparse.cpp"
+} break;
+case 223: {
+#line 1298 "rlparse.kl"
+ (&redLel->user.token_type)->token = *(&rhs[0]->user.token);
+
+#line 5887 "rlparse.cpp"
+} break;
+case 224: {
+#line 1299 "rlparse.kl"
+ (&redLel->user.token_type)->token = *(&rhs[0]->user.token);
+
+#line 5893 "rlparse.cpp"
+} break;
+case 225: {
+#line 1300 "rlparse.kl"
+ (&redLel->user.token_type)->token = *(&rhs[0]->user.token);
+
+#line 5899 "rlparse.cpp"
+} break;
+case 226: {
+#line 1301 "rlparse.kl"
+ (&redLel->user.token_type)->token = *(&rhs[0]->user.token);
+
+#line 5905 "rlparse.cpp"
+} break;
+}
+ #ifdef LOG_ACTIONS
+ cerr << "reduced: "
+ << Parser_prodNames[reduction]
+ << " rhsLen: " << rhsLen;
+ #endif
+ if ( action[1] == 0 )
+ redLel->retry = 0;
+ else {
+ redLel->retry += 0x10000;
+ numRetry += 1;
+ #ifdef LOG_ACTIONS
+ cerr << " retry: " << redLel;
+ #endif
+ }
+
+ #ifdef LOG_ACTIONS
+ cerr << endl;
+ #endif
+
+ if ( rhsLen == 0 ) {
+ redLel->file = lel->file;
+ redLel->line = lel->line;
+ targState = curs;
+ }
+ else {
+ redLel->child = rhs[rhsLen-1];
+ redLel->file = rhs[0]->file;
+ redLel->line = rhs[0]->line;
+ targState = rhs[0]->state;
+ }
+
+ if ( induceReject ) {
+ #ifdef LOG_ACTIONS
+ cerr << "error induced during reduction of " <<
+ Parser_lelNames[redLel->type] << endl;
+ #endif
+ redLel->state = curs;
+ redLel->next = stackTop;
+ stackTop = redLel;
+ curs = targState;
+ goto parseError;
+ }
+ else {
+ redLel->next = input;
+ input = redLel;
+ }
+ }
+
+
+ curs = targState;
+ goto again;
+
+parseError:
+ #ifdef LOG_BACKTRACK
+ cerr << "hit error" << endl;
+ #endif
+ if ( numRetry > 0 ) {
+ while ( 1 ) {
+ struct Parser_LangEl *redLel = stackTop;
+ if ( stackTop->type < 225 ) {
+ #ifdef LOG_BACKTRACK
+ cerr << "backing up over terminal: " <<
+ Parser_lelNames[stackTop->type] << endl;
+ #endif
+ stackTop = stackTop->next;
+ redLel->next = input;
+ input = redLel;
+ }
+ else {
+ #ifdef LOG_BACKTRACK
+ cerr << "backing up over non-terminal: " <<
+ Parser_lelNames[stackTop->type] << endl;
+ #endif
+ stackTop = stackTop->next;
+ struct Parser_LangEl *first = redLel->child;
+ if ( first == 0 )
+ rhsLen = 0;
+ else {
+ rhsLen = 1;
+ while ( first->next != 0 ) {
+ first = first->next;
+ rhsLen += 1;
+ }
+ first->next = stackTop;
+ stackTop = redLel->child;
+
+ struct Parser_LangEl *rhsEl = stackTop;
+ int p = rhsLen;
+ while ( p > 0 ) {
+ rhs[--p] = rhsEl;
+ rhsEl = rhsEl->next;
+ }
+ }
+ redLel->next = pool;
+ pool = redLel;
+ numNodes -= 1;
+ }
+
+ if ( redLel->retry > 0 ) {
+ #ifdef LOG_BACKTRACK
+ cerr << "found retry targ: " << redLel << endl;
+ #endif
+ numRetry -= 1;
+ #ifdef LOG_BACKTRACK
+ cerr << "found retry: " << redLel << endl;
+ #endif
+ if ( redLel->retry & 0x0000ffff )
+ curs = input->state;
+ else {
+ input->retry = redLel->retry >> 16;
+ if ( stackTop->state < 0 )
+ curs = Parser_startState;
+ else {
+ curs = Parser_targs[(int)Parser_indicies[Parser_offsets[stackTop->state] + (stackTop->type - Parser_keys[stackTop->state<<1])]];
+ }
+ }
+ goto again;
+ }
+ }
+ }
+ curs = -1;
+ errCount += 1;
+_out: {}
+#line 1385 "rlparse.kl"
+ return errCount == 0 ? 0 : -1;
+}
+
+void Parser::tryMachineDef( InputLoc &loc, char *name,
+ JoinOrLm *joinOrLm, bool isInstance )
+{
+ GraphDictEl *newEl = pd->graphDict.insert( name );
+ if ( newEl != 0 ) {
+ /* New element in the dict, all good. */
+ newEl->value = new VarDef( name, joinOrLm );
+ newEl->isInstance = isInstance;
+ newEl->loc = loc;
+ newEl->value->isExport = exportContext[exportContext.length()-1];
+
+ /* It it is an instance, put on the instance list. */
+ if ( isInstance )
+ pd->instanceList.append( newEl );
+ }
+ else {
+ // Recover by ignoring the duplicate.
+ error(loc) << "fsm \"" << name << "\" previously defined" << endl;
+ }
+}
+
+ostream &Parser::parse_error( int tokId, Token &token )
+{
+ /* Maintain the error count. */
+ gblErrorCount += 1;
+
+ cerr << token.loc.fileName << ":" << token.loc.line << ":" << token.loc.col << ": ";
+ cerr << "at token ";
+ if ( tokId < 128 )
+ cerr << "\"" << Parser_lelNames[tokId] << "\"";
+ else
+ cerr << Parser_lelNames[tokId];
+ if ( token.data != 0 )
+ cerr << " with data \"" << token.data << "\"";
+ cerr << ": ";
+
+ return cerr;
+}
+
+int Parser::token( InputLoc &loc, int tokId, char *tokstart, int toklen )
+{
+ Token token;
+ token.data = tokstart;
+ token.length = toklen;
+ token.loc = loc;
+ int res = parseLangEl( tokId, &token );
+ if ( res < 0 ) {
+ parse_error(tokId, token) << "parse error" << endl;
+ exit(1);
+ }
+ return res;
+}
diff --git a/contrib/tools/ragel5/ragel/rlparse.h b/contrib/tools/ragel5/ragel/rlparse.h
new file mode 100644
index 0000000000..957db0fd69
--- /dev/null
+++ b/contrib/tools/ragel5/ragel/rlparse.h
@@ -0,0 +1,184 @@
+/* Automatically generated by Kelbt from "rlparse.kh".
+ *
+ * Parts of this file are copied from Kelbt source covered by the GNU
+ * GPL. As a special exception, you may use the parts of this file copied
+ * from Kelbt source without restriction. The remainder is derived from
+ * "rlparse.kh" and inherits the copyright status of that file.
+ */
+
+#line 1 "rlparse.kh"
+/*
+ * Copyright 2001-2007 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef RLPARSE_H
+#define RLPARSE_H
+
+#include <iostream>
+#include "avltree.h"
+#include "parsedata.h"
+
+struct Parser
+{
+#line 93 "rlparse.kh"
+
+
+ #line 44 "rlparse.h"
+ struct Parser_LangEl *freshEl;
+ int freshPos;
+ struct Parser_LangEl *pool;
+ int numRetry;
+ int numNodes;
+ struct Parser_LangEl *stackTop;
+ struct Parser_LangEl *lastFinal;
+ int errCount;
+ int curs;
+#line 96 "rlparse.kh"
+
+ void init();
+ int parseLangEl( int type, const Token *token );
+
+ Parser(const char *fileName, char *sectionName, InputLoc &sectionLoc )
+ : sectionName(sectionName)
+ {
+ pd = new ParseData( fileName, sectionName, sectionLoc );
+ exportContext.append( false );
+ }
+
+ int token( InputLoc &loc, int tokId, char *tokstart, int toklen );
+ void tryMachineDef( InputLoc &loc, char *name,
+ JoinOrLm *joinOrLm, bool isInstance );
+
+ /* Report an error encountered by the parser. */
+ ostream &parse_error( int tokId, Token &token );
+
+ ParseData *pd;
+
+ /* The name of the root section, this does not change during an include. */
+ char *sectionName;
+
+ NameRef nameRef;
+ NameRefList nameRefList;
+
+ Vector<bool> exportContext;
+};
+
+#line 84 "rlparse.h"
+#define KW_Machine 128
+#define KW_Include 129
+#define KW_Import 130
+#define KW_Write 131
+#define TK_Word 132
+#define TK_Literal 133
+#define TK_Number 134
+#define TK_Inline 135
+#define TK_Reference 136
+#define TK_ColonEquals 137
+#define TK_EndSection 138
+#define TK_UInt 139
+#define TK_Hex 140
+#define TK_BaseClause 141
+#define TK_DotDot 142
+#define TK_ColonGt 143
+#define TK_ColonGtGt 144
+#define TK_LtColon 145
+#define TK_Arrow 146
+#define TK_DoubleArrow 147
+#define TK_StarStar 148
+#define TK_NameSep 149
+#define TK_BarStar 150
+#define TK_DashDash 151
+#define TK_StartCond 152
+#define TK_AllCond 153
+#define TK_LeavingCond 154
+#define TK_Middle 155
+#define TK_StartGblError 156
+#define TK_AllGblError 157
+#define TK_FinalGblError 158
+#define TK_NotFinalGblError 159
+#define TK_NotStartGblError 160
+#define TK_MiddleGblError 161
+#define TK_StartLocalError 162
+#define TK_AllLocalError 163
+#define TK_FinalLocalError 164
+#define TK_NotFinalLocalError 165
+#define TK_NotStartLocalError 166
+#define TK_MiddleLocalError 167
+#define TK_StartEOF 168
+#define TK_AllEOF 169
+#define TK_FinalEOF 170
+#define TK_NotFinalEOF 171
+#define TK_NotStartEOF 172
+#define TK_MiddleEOF 173
+#define TK_StartToState 174
+#define TK_AllToState 175
+#define TK_FinalToState 176
+#define TK_NotFinalToState 177
+#define TK_NotStartToState 178
+#define TK_MiddleToState 179
+#define TK_StartFromState 180
+#define TK_AllFromState 181
+#define TK_FinalFromState 182
+#define TK_NotFinalFromState 183
+#define TK_NotStartFromState 184
+#define TK_MiddleFromState 185
+#define RE_Slash 186
+#define RE_SqOpen 187
+#define RE_SqOpenNeg 188
+#define RE_SqClose 189
+#define RE_Dot 190
+#define RE_Star 191
+#define RE_Dash 192
+#define RE_Char 193
+#define IL_WhiteSpace 194
+#define IL_Comment 195
+#define IL_Literal 196
+#define IL_Symbol 197
+#define KW_Action 198
+#define KW_AlphType 199
+#define KW_Range 200
+#define KW_GetKey 201
+#define KW_When 202
+#define KW_Eof 203
+#define KW_Err 204
+#define KW_Lerr 205
+#define KW_To 206
+#define KW_From 207
+#define KW_Export 208
+#define KW_Break 209
+#define KW_Exec 210
+#define KW_Hold 211
+#define KW_PChar 212
+#define KW_Char 213
+#define KW_Goto 214
+#define KW_Call 215
+#define KW_Ret 216
+#define KW_CurState 217
+#define KW_TargState 218
+#define KW_Entry 219
+#define KW_Next 220
+#define KW_Variable 221
+#define KW_Access 222
+#define TK_Semi 223
+#define _eof 224
+
+#line 126 "rlparse.kh"
+
+#endif
diff --git a/contrib/tools/ragel5/ragel/rlscan.cpp b/contrib/tools/ragel5/ragel/rlscan.cpp
new file mode 100644
index 0000000000..47a7f02148
--- /dev/null
+++ b/contrib/tools/ragel5/ragel/rlscan.cpp
@@ -0,0 +1,4876 @@
+#line 1 "rlscan.rl"
+/*
+ * Copyright 2006-2007 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <iostream>
+#include <fstream>
+#include <string.h>
+
+#include "ragel.h"
+#include "rlscan.h"
+
+//#define LOG_TOKENS
+
+using std::ifstream;
+using std::istream;
+using std::ostream;
+using std::cout;
+using std::cerr;
+using std::endl;
+
+enum InlineBlockType
+{
+ CurlyDelimited,
+ SemiTerminated
+};
+
+
+/*
+ * The Scanner for Importing
+ */
+
+#define IMP_Word 128
+#define IMP_Literal 129
+#define IMP_UInt 130
+#define IMP_Define 131
+
+#line 124 "rlscan.rl"
+
+
+
+#line 60 "rlscan.cpp"
+static const int inline_token_scan_start = 2;
+
+static const int inline_token_scan_first_final = 2;
+
+static const int inline_token_scan_error = -1;
+
+#line 127 "rlscan.rl"
+
+void Scanner::flushImport()
+{
+ int *p = token_data;
+ int *pe = token_data + cur_token;
+
+
+#line 75 "rlscan.cpp"
+ {
+ tok_cs = inline_token_scan_start;
+ tok_tokstart = 0;
+ tok_tokend = 0;
+ tok_act = 0;
+ }
+#line 134 "rlscan.rl"
+
+#line 84 "rlscan.cpp"
+ {
+ if ( p == pe )
+ goto _out;
+ switch ( tok_cs )
+ {
+tr0:
+#line 122 "rlscan.rl"
+ {{p = (( tok_tokend))-1;}}
+ goto st2;
+tr1:
+#line 108 "rlscan.rl"
+ { tok_tokend = p+1;{
+ int base = tok_tokstart - token_data;
+ int nameOff = 0;
+ int litOff = 2;
+
+ directToParser( inclToParser, fileName, line, column, TK_Word,
+ token_strings[base+nameOff], token_lens[base+nameOff] );
+ directToParser( inclToParser, fileName, line, column, '=', 0, 0 );
+ directToParser( inclToParser, fileName, line, column, TK_Literal,
+ token_strings[base+litOff], token_lens[base+litOff] );
+ directToParser( inclToParser, fileName, line, column, ';', 0, 0 );
+ }{p = (( tok_tokend))-1;}}
+ goto st2;
+tr2:
+#line 80 "rlscan.rl"
+ { tok_tokend = p+1;{
+ int base = tok_tokstart - token_data;
+ int nameOff = 0;
+ int numOff = 2;
+
+ directToParser( inclToParser, fileName, line, column, TK_Word,
+ token_strings[base+nameOff], token_lens[base+nameOff] );
+ directToParser( inclToParser, fileName, line, column, '=', 0, 0 );
+ directToParser( inclToParser, fileName, line, column, TK_UInt,
+ token_strings[base+numOff], token_lens[base+numOff] );
+ directToParser( inclToParser, fileName, line, column, ';', 0, 0 );
+ }{p = (( tok_tokend))-1;}}
+ goto st2;
+tr3:
+#line 94 "rlscan.rl"
+ { tok_tokend = p+1;{
+ int base = tok_tokstart - token_data;
+ int nameOff = 1;
+ int litOff = 2;
+
+ directToParser( inclToParser, fileName, line, column, TK_Word,
+ token_strings[base+nameOff], token_lens[base+nameOff] );
+ directToParser( inclToParser, fileName, line, column, '=', 0, 0 );
+ directToParser( inclToParser, fileName, line, column, TK_Literal,
+ token_strings[base+litOff], token_lens[base+litOff] );
+ directToParser( inclToParser, fileName, line, column, ';', 0, 0 );
+ }{p = (( tok_tokend))-1;}}
+ goto st2;
+tr4:
+#line 66 "rlscan.rl"
+ { tok_tokend = p+1;{
+ int base = tok_tokstart - token_data;
+ int nameOff = 1;
+ int numOff = 2;
+
+ directToParser( inclToParser, fileName, line, column, TK_Word,
+ token_strings[base+nameOff], token_lens[base+nameOff] );
+ directToParser( inclToParser, fileName, line, column, '=', 0, 0 );
+ directToParser( inclToParser, fileName, line, column, TK_UInt,
+ token_strings[base+numOff], token_lens[base+numOff] );
+ directToParser( inclToParser, fileName, line, column, ';', 0, 0 );
+ }{p = (( tok_tokend))-1;}}
+ goto st2;
+tr5:
+#line 122 "rlscan.rl"
+ { tok_tokend = p+1;{p = (( tok_tokend))-1;}}
+ goto st2;
+tr8:
+#line 122 "rlscan.rl"
+ { tok_tokend = p;{p = (( tok_tokend))-1;}}
+ goto st2;
+st2:
+#line 1 "rlscan.rl"
+ { tok_tokstart = 0;}
+ if ( ++p == pe )
+ goto _out2;
+case 2:
+#line 1 "rlscan.rl"
+ { tok_tokstart = p;}
+#line 170 "rlscan.cpp"
+ switch( (*p) ) {
+ case 128: goto tr6;
+ case 131: goto tr7;
+ }
+ goto tr5;
+tr6:
+#line 1 "rlscan.rl"
+ { tok_tokend = p+1;}
+ goto st3;
+st3:
+ if ( ++p == pe )
+ goto _out3;
+case 3:
+#line 184 "rlscan.cpp"
+ if ( (*p) == 61 )
+ goto st0;
+ goto tr8;
+st0:
+ if ( ++p == pe )
+ goto _out0;
+case 0:
+ switch( (*p) ) {
+ case 129: goto tr1;
+ case 130: goto tr2;
+ }
+ goto tr0;
+tr7:
+#line 1 "rlscan.rl"
+ { tok_tokend = p+1;}
+ goto st4;
+st4:
+ if ( ++p == pe )
+ goto _out4;
+case 4:
+#line 205 "rlscan.cpp"
+ if ( (*p) == 128 )
+ goto st1;
+ goto tr8;
+st1:
+ if ( ++p == pe )
+ goto _out1;
+case 1:
+ switch( (*p) ) {
+ case 129: goto tr3;
+ case 130: goto tr4;
+ }
+ goto tr0;
+ }
+ _out2: tok_cs = 2; goto _out;
+ _out3: tok_cs = 3; goto _out;
+ _out0: tok_cs = 0; goto _out;
+ _out4: tok_cs = 4; goto _out;
+ _out1: tok_cs = 1; goto _out;
+
+ _out: {}
+ }
+#line 135 "rlscan.rl"
+
+ if ( tok_tokstart == 0 )
+ cur_token = 0;
+ else {
+ cur_token = pe - tok_tokstart;
+ int ts_offset = tok_tokstart - token_data;
+ memmove( token_data, token_data+ts_offset, cur_token*sizeof(token_data[0]) );
+ memmove( token_strings, token_strings+ts_offset, cur_token*sizeof(token_strings[0]) );
+ memmove( token_lens, token_lens+ts_offset, cur_token*sizeof(token_lens[0]) );
+ }
+}
+
+void Scanner::directToParser( Parser *toParser, const char *tokFileName, int tokLine,
+ int tokColumn, int type, char *tokdata, int toklen )
+{
+ InputLoc loc;
+
+ #ifdef LOG_TOKENS
+ cerr << "scanner:" << tokLine << ":" << tokColumn <<
+ ": sending token to the parser " << Parser_lelNames[type];
+ cerr << " " << toklen;
+ if ( tokdata != 0 )
+ cerr << " " << tokdata;
+ cerr << endl;
+ #endif
+
+ loc.fileName = tokFileName;
+ loc.line = tokLine;
+ loc.col = tokColumn;
+
+ toParser->token( loc, type, tokdata, toklen );
+}
+
+void Scanner::importToken( int token, char *start, char *end )
+{
+ if ( cur_token == max_tokens )
+ flushImport();
+
+ token_data[cur_token] = token;
+ if ( start == 0 ) {
+ token_strings[cur_token] = 0;
+ token_lens[cur_token] = 0;
+ }
+ else {
+ int toklen = end-start;
+ token_lens[cur_token] = toklen;
+ token_strings[cur_token] = new char[toklen+1];
+ memcpy( token_strings[cur_token], start, toklen );
+ token_strings[cur_token][toklen] = 0;
+ }
+ cur_token++;
+}
+
+void Scanner::pass( int token, char *start, char *end )
+{
+ if ( importMachines )
+ importToken( token, start, end );
+ pass();
+}
+
+void Scanner::pass()
+{
+ updateCol();
+
+ /* If no errors and we are at the bottom of the include stack (the
+ * source file listed on the command line) then write out the data. */
+ if ( includeDepth == 0 && machineSpec == 0 && machineName == 0 )
+ xmlEscapeHost( output, tokstart, tokend-tokstart );
+}
+
+/*
+ * The scanner for processing sections, includes, imports, etc.
+ */
+
+
+#line 303 "rlscan.cpp"
+static const int section_parse_start = 10;
+
+static const int section_parse_first_final = 10;
+
+static const int section_parse_error = 0;
+
+#line 213 "rlscan.rl"
+
+
+
+void Scanner::init( )
+{
+
+#line 317 "rlscan.cpp"
+ {
+ cs = section_parse_start;
+ }
+#line 219 "rlscan.rl"
+}
+
+bool Scanner::active()
+{
+ if ( ignoreSection )
+ return false;
+
+ if ( parser == 0 && ! parserExistsError ) {
+ scan_error() << "there is no previous specification name" << endl;
+ parserExistsError = true;
+ }
+
+ if ( parser == 0 )
+ return false;
+
+ return true;
+}
+
+ostream &Scanner::scan_error()
+{
+ /* Maintain the error count. */
+ gblErrorCount += 1;
+ cerr << fileName << ":" << line << ":" << column << ": ";
+ return cerr;
+}
+
+bool Scanner::recursiveInclude(const char *inclFileName, char *inclSectionName )
+{
+ for ( IncludeStack::Iter si = includeStack; si.lte(); si++ ) {
+ if ( strcmp( si->fileName, inclFileName ) == 0 &&
+ strcmp( si->sectionName, inclSectionName ) == 0 )
+ {
+ return true;
+ }
+ }
+ return false;
+}
+
+void Scanner::updateCol()
+{
+ char *from = lastnl;
+ if ( from == 0 )
+ from = tokstart;
+ //cerr << "adding " << tokend - from << " to column" << endl;
+ column += tokend - from;
+ lastnl = 0;
+}
+
+#line 442 "rlscan.rl"
+
+
+void Scanner::token( int type, char c )
+{
+ token( type, &c, &c + 1 );
+}
+
+void Scanner::token( int type )
+{
+ token( type, 0, 0 );
+}
+
+void Scanner::token( int type, char *start, char *end )
+{
+ char *tokdata = 0;
+ int toklen = 0;
+ if ( start != 0 ) {
+ toklen = end-start;
+ tokdata = new char[toklen+1];
+ memcpy( tokdata, start, toklen );
+ tokdata[toklen] = 0;
+ }
+
+ processToken( type, tokdata, toklen );
+}
+
+void Scanner::processToken( int type, char *tokdata, int toklen )
+{
+ int *p = &type;
+ int *pe = &type + 1;
+
+
+#line 403 "rlscan.cpp"
+ {
+ if ( p == pe )
+ goto _out;
+ switch ( cs )
+ {
+tr2:
+#line 289 "rlscan.rl"
+ {
+ /* Assign a name to the machine. */
+ char *machine = word;
+
+ if ( !importMachines && inclSectionTarg == 0 ) {
+ ignoreSection = false;
+
+ ParserDictEl *pdEl = parserDict.find( machine );
+ if ( pdEl == 0 ) {
+ pdEl = new ParserDictEl( machine );
+ pdEl->value = new Parser( fileName, machine, sectionLoc );
+ pdEl->value->init();
+ parserDict.insert( pdEl );
+ }
+
+ parser = pdEl->value;
+ }
+ else if ( !importMachines && strcmp( inclSectionTarg, machine ) == 0 ) {
+ /* found include target */
+ ignoreSection = false;
+ parser = inclToParser;
+ }
+ else {
+ /* ignoring section */
+ ignoreSection = true;
+ parser = 0;
+ }
+ }
+ goto st10;
+tr6:
+#line 323 "rlscan.rl"
+ {
+ if ( active() ) {
+ char *inclSectionName = word;
+ const char *inclFileName = 0;
+
+ /* Implement defaults for the input file and section name. */
+ if ( inclSectionName == 0 )
+ inclSectionName = parser->sectionName;
+
+ if ( lit != 0 )
+ inclFileName = prepareFileName( lit, lit_len );
+ else
+ inclFileName = fileName;
+
+ /* Check for a recursive include structure. Add the current file/section
+ * name then check if what we are including is already in the stack. */
+ includeStack.append( IncludeStackItem( fileName, parser->sectionName ) );
+
+ if ( recursiveInclude( inclFileName, inclSectionName ) )
+ scan_error() << "include: this is a recursive include operation" << endl;
+ else {
+ /* Open the input file for reading. */
+ ifstream *inFile = new ifstream( inclFileName );
+ if ( ! inFile->is_open() ) {
+ scan_error() << "include: could not open " <<
+ inclFileName << " for reading" << endl;
+ }
+
+ Scanner scanner( inclFileName, *inFile, output, parser,
+ inclSectionName, includeDepth+1, false );
+ scanner.do_scan( );
+ delete inFile;
+ }
+
+ /* Remove the last element (len-1) */
+ includeStack.remove( -1 );
+ }
+ }
+ goto st10;
+tr10:
+#line 372 "rlscan.rl"
+ {
+ if ( active() ) {
+ char *importFileName = prepareFileName( lit, lit_len );
+
+ /* Open the input file for reading. */
+ ifstream *inFile = new ifstream( importFileName );
+ if ( ! inFile->is_open() ) {
+ scan_error() << "import: could not open " <<
+ importFileName << " for reading" << endl;
+ }
+
+ Scanner scanner( importFileName, *inFile, output, parser,
+ 0, includeDepth+1, true );
+ scanner.do_scan( );
+ scanner.importToken( 0, 0, 0 );
+ scanner.flushImport();
+ delete inFile;
+ }
+ }
+ goto st10;
+tr13:
+#line 414 "rlscan.rl"
+ {
+ if ( active() && machineSpec == 0 && machineName == 0 )
+ output << "</write>\n";
+ }
+ goto st10;
+tr14:
+#line 425 "rlscan.rl"
+ {
+ /* Send the token off to the parser. */
+ if ( active() )
+ directToParser( parser, fileName, line, column, type, tokdata, toklen );
+ }
+ goto st10;
+st10:
+ if ( ++p == pe )
+ goto _out10;
+case 10:
+#line 522 "rlscan.cpp"
+ switch( (*p) ) {
+ case 128: goto st1;
+ case 129: goto st3;
+ case 130: goto st6;
+ case 131: goto tr18;
+ }
+ goto tr14;
+st1:
+ if ( ++p == pe )
+ goto _out1;
+case 1:
+ if ( (*p) == 132 )
+ goto tr1;
+ goto tr0;
+tr0:
+#line 283 "rlscan.rl"
+ { scan_error() << "bad machine statement" << endl; }
+ goto st0;
+tr3:
+#line 284 "rlscan.rl"
+ { scan_error() << "bad include statement" << endl; }
+ goto st0;
+tr8:
+#line 285 "rlscan.rl"
+ { scan_error() << "bad import statement" << endl; }
+ goto st0;
+tr11:
+#line 286 "rlscan.rl"
+ { scan_error() << "bad write statement" << endl; }
+ goto st0;
+#line 553 "rlscan.cpp"
+st0:
+ goto _out0;
+tr1:
+#line 280 "rlscan.rl"
+ { word = tokdata; word_len = toklen; }
+ goto st2;
+st2:
+ if ( ++p == pe )
+ goto _out2;
+case 2:
+#line 564 "rlscan.cpp"
+ if ( (*p) == 59 )
+ goto tr2;
+ goto tr0;
+st3:
+ if ( ++p == pe )
+ goto _out3;
+case 3:
+ switch( (*p) ) {
+ case 132: goto tr4;
+ case 133: goto tr5;
+ }
+ goto tr3;
+tr4:
+#line 279 "rlscan.rl"
+ { word = lit = 0; word_len = lit_len = 0; }
+#line 280 "rlscan.rl"
+ { word = tokdata; word_len = toklen; }
+ goto st4;
+st4:
+ if ( ++p == pe )
+ goto _out4;
+case 4:
+#line 587 "rlscan.cpp"
+ switch( (*p) ) {
+ case 59: goto tr6;
+ case 133: goto tr7;
+ }
+ goto tr3;
+tr5:
+#line 279 "rlscan.rl"
+ { word = lit = 0; word_len = lit_len = 0; }
+#line 281 "rlscan.rl"
+ { lit = tokdata; lit_len = toklen; }
+ goto st5;
+tr7:
+#line 281 "rlscan.rl"
+ { lit = tokdata; lit_len = toklen; }
+ goto st5;
+st5:
+ if ( ++p == pe )
+ goto _out5;
+case 5:
+#line 607 "rlscan.cpp"
+ if ( (*p) == 59 )
+ goto tr6;
+ goto tr3;
+st6:
+ if ( ++p == pe )
+ goto _out6;
+case 6:
+ if ( (*p) == 133 )
+ goto tr9;
+ goto tr8;
+tr9:
+#line 281 "rlscan.rl"
+ { lit = tokdata; lit_len = toklen; }
+ goto st7;
+st7:
+ if ( ++p == pe )
+ goto _out7;
+case 7:
+#line 626 "rlscan.cpp"
+ if ( (*p) == 59 )
+ goto tr10;
+ goto tr8;
+tr18:
+#line 397 "rlscan.rl"
+ {
+ if ( active() && machineSpec == 0 && machineName == 0 ) {
+ output << "<write"
+ " def_name=\"" << parser->sectionName << "\""
+ " line=\"" << line << "\""
+ " col=\"" << column << "\""
+ ">";
+ }
+ }
+ goto st8;
+st8:
+ if ( ++p == pe )
+ goto _out8;
+case 8:
+#line 646 "rlscan.cpp"
+ if ( (*p) == 132 )
+ goto tr12;
+ goto tr11;
+tr12:
+#line 408 "rlscan.rl"
+ {
+ if ( active() && machineSpec == 0 && machineName == 0 )
+ output << "<arg>" << tokdata << "</arg>";
+ }
+ goto st9;
+st9:
+ if ( ++p == pe )
+ goto _out9;
+case 9:
+#line 661 "rlscan.cpp"
+ switch( (*p) ) {
+ case 59: goto tr13;
+ case 132: goto tr12;
+ }
+ goto tr11;
+ }
+ _out10: cs = 10; goto _out;
+ _out1: cs = 1; goto _out;
+ _out0: cs = 0; goto _out;
+ _out2: cs = 2; goto _out;
+ _out3: cs = 3; goto _out;
+ _out4: cs = 4; goto _out;
+ _out5: cs = 5; goto _out;
+ _out6: cs = 6; goto _out;
+ _out7: cs = 7; goto _out;
+ _out8: cs = 8; goto _out;
+ _out9: cs = 9; goto _out;
+
+ _out: {}
+ }
+#line 476 "rlscan.rl"
+
+
+ updateCol();
+
+ /* Record the last token for use in controlling the scan of subsequent
+ * tokens. */
+ lastToken = type;
+}
+
+void Scanner::startSection( )
+{
+ parserExistsError = false;
+
+ if ( includeDepth == 0 ) {
+ if ( machineSpec == 0 && machineName == 0 )
+ output << "</host>\n";
+ }
+
+ sectionLoc.fileName = fileName;
+ sectionLoc.line = line;
+ sectionLoc.col = 0;
+}
+
+void Scanner::endSection( )
+{
+ /* Execute the eof actions for the section parser. */
+
+#line 710 "rlscan.cpp"
+ {
+ switch ( cs ) {
+ case 1:
+ case 2:
+#line 283 "rlscan.rl"
+ { scan_error() << "bad machine statement" << endl; }
+ break;
+ case 3:
+ case 4:
+ case 5:
+#line 284 "rlscan.rl"
+ { scan_error() << "bad include statement" << endl; }
+ break;
+ case 6:
+ case 7:
+#line 285 "rlscan.rl"
+ { scan_error() << "bad import statement" << endl; }
+ break;
+ case 8:
+ case 9:
+#line 286 "rlscan.rl"
+ { scan_error() << "bad write statement" << endl; }
+ break;
+#line 734 "rlscan.cpp"
+ }
+ }
+
+#line 505 "rlscan.rl"
+
+
+ /* Close off the section with the parser. */
+ if ( active() ) {
+ InputLoc loc;
+ loc.fileName = fileName;
+ loc.line = line;
+ loc.col = 0;
+
+ parser->token( loc, TK_EndSection, 0, 0 );
+ }
+
+ if ( includeDepth == 0 ) {
+ if ( machineSpec == 0 && machineName == 0 ) {
+ /* The end section may include a newline on the end, so
+ * we use the last line, which will count the newline. */
+ output << "<host line=\"" << line << "\">";
+ }
+ }
+}
+
+#line 917 "rlscan.rl"
+
+
+
+#line 764 "rlscan.cpp"
+static const int rlscan_start = 23;
+
+static const int rlscan_first_final = 23;
+
+static const int rlscan_error = 0;
+
+#line 920 "rlscan.rl"
+
+void Scanner::do_scan()
+{
+ int bufsize = 8;
+ char *buf = new char[bufsize];
+ const char last_char = 0;
+ int cs, act, have = 0;
+ int top, stack[1];
+ int curly_count = 0;
+ bool execute = true;
+ bool singleLineSpec = false;
+ InlineBlockType inlineBlockType = CurlyDelimited;
+
+ /* Init the section parser and the character scanner. */
+ init();
+
+#line 788 "rlscan.cpp"
+ {
+ cs = rlscan_start;
+ top = 0;
+ tokstart = 0;
+ tokend = 0;
+ act = 0;
+ }
+#line 936 "rlscan.rl"
+
+ while ( execute ) {
+ char *p = buf + have;
+ int space = bufsize - have;
+
+ if ( space == 0 ) {
+ /* We filled up the buffer trying to scan a token. Grow it. */
+ bufsize = bufsize * 2;
+ char *newbuf = new char[bufsize];
+
+ /* Recompute p and space. */
+ p = newbuf + have;
+ space = bufsize - have;
+
+ /* Patch up pointers possibly in use. */
+ if ( tokstart != 0 )
+ tokstart = newbuf + ( tokstart - buf );
+ tokend = newbuf + ( tokend - buf );
+
+ /* Copy the new buffer in. */
+ memcpy( newbuf, buf, have );
+ delete[] buf;
+ buf = newbuf;
+ }
+
+ input.read( p, space );
+ int len = input.gcount();
+
+ /* If we see eof then append the EOF char. */
+ if ( len == 0 ) {
+ p[0] = last_char, len = 1;
+ execute = false;
+ }
+
+ char *pe = p + len;
+
+#line 833 "rlscan.cpp"
+ {
+ if ( p == pe )
+ goto _out;
+ goto _resume;
+
+_again:
+ switch ( cs ) {
+ case 23: goto st23;
+ case 24: goto st24;
+ case 25: goto st25;
+ case 1: goto st1;
+ case 2: goto st2;
+ case 26: goto st26;
+ case 27: goto st27;
+ case 28: goto st28;
+ case 3: goto st3;
+ case 4: goto st4;
+ case 29: goto st29;
+ case 5: goto st5;
+ case 6: goto st6;
+ case 7: goto st7;
+ case 30: goto st30;
+ case 31: goto st31;
+ case 32: goto st32;
+ case 33: goto st33;
+ case 34: goto st34;
+ case 35: goto st35;
+ case 36: goto st36;
+ case 37: goto st37;
+ case 38: goto st38;
+ case 39: goto st39;
+ case 8: goto st8;
+ case 9: goto st9;
+ case 40: goto st40;
+ case 10: goto st10;
+ case 11: goto st11;
+ case 41: goto st41;
+ case 12: goto st12;
+ case 13: goto st13;
+ case 14: goto st14;
+ case 42: goto st42;
+ case 43: goto st43;
+ case 15: goto st15;
+ case 44: goto st44;
+ case 45: goto st45;
+ case 46: goto st46;
+ case 47: goto st47;
+ case 48: goto st48;
+ case 49: goto st49;
+ case 50: goto st50;
+ case 51: goto st51;
+ case 52: goto st52;
+ case 53: goto st53;
+ case 54: goto st54;
+ case 55: goto st55;
+ case 56: goto st56;
+ case 57: goto st57;
+ case 58: goto st58;
+ case 59: goto st59;
+ case 60: goto st60;
+ case 61: goto st61;
+ case 62: goto st62;
+ case 63: goto st63;
+ case 64: goto st64;
+ case 65: goto st65;
+ case 66: goto st66;
+ case 67: goto st67;
+ case 68: goto st68;
+ case 69: goto st69;
+ case 70: goto st70;
+ case 71: goto st71;
+ case 72: goto st72;
+ case 73: goto st73;
+ case 74: goto st74;
+ case 75: goto st75;
+ case 76: goto st76;
+ case 77: goto st77;
+ case 78: goto st78;
+ case 79: goto st79;
+ case 80: goto st80;
+ case 81: goto st81;
+ case 82: goto st82;
+ case 83: goto st83;
+ case 84: goto st84;
+ case 85: goto st85;
+ case 0: goto st0;
+ case 86: goto st86;
+ case 87: goto st87;
+ case 88: goto st88;
+ case 89: goto st89;
+ case 90: goto st90;
+ case 16: goto st16;
+ case 91: goto st91;
+ case 17: goto st17;
+ case 92: goto st92;
+ case 18: goto st18;
+ case 93: goto st93;
+ case 94: goto st94;
+ case 95: goto st95;
+ case 19: goto st19;
+ case 20: goto st20;
+ case 96: goto st96;
+ case 97: goto st97;
+ case 98: goto st98;
+ case 99: goto st99;
+ case 100: goto st100;
+ case 21: goto st21;
+ case 101: goto st101;
+ case 102: goto st102;
+ case 103: goto st103;
+ case 104: goto st104;
+ case 105: goto st105;
+ case 106: goto st106;
+ case 107: goto st107;
+ case 108: goto st108;
+ case 109: goto st109;
+ case 110: goto st110;
+ case 111: goto st111;
+ case 112: goto st112;
+ case 113: goto st113;
+ case 114: goto st114;
+ case 115: goto st115;
+ case 116: goto st116;
+ case 117: goto st117;
+ case 118: goto st118;
+ case 119: goto st119;
+ case 120: goto st120;
+ case 121: goto st121;
+ case 122: goto st122;
+ case 123: goto st123;
+ case 124: goto st124;
+ case 125: goto st125;
+ case 126: goto st126;
+ case 127: goto st127;
+ case 128: goto st128;
+ case 129: goto st129;
+ case 130: goto st130;
+ case 131: goto st131;
+ case 132: goto st132;
+ case 133: goto st133;
+ case 134: goto st134;
+ case 135: goto st135;
+ case 136: goto st136;
+ case 137: goto st137;
+ case 138: goto st138;
+ case 139: goto st139;
+ case 140: goto st140;
+ case 141: goto st141;
+ case 142: goto st142;
+ case 143: goto st143;
+ case 144: goto st144;
+ case 145: goto st145;
+ case 146: goto st146;
+ case 147: goto st147;
+ case 148: goto st148;
+ case 149: goto st149;
+ case 150: goto st150;
+ case 151: goto st151;
+ case 152: goto st152;
+ case 153: goto st153;
+ case 154: goto st154;
+ case 155: goto st155;
+ case 156: goto st156;
+ case 157: goto st157;
+ case 158: goto st158;
+ case 159: goto st159;
+ case 160: goto st160;
+ case 161: goto st161;
+ case 162: goto st162;
+ case 163: goto st163;
+ case 164: goto st164;
+ case 165: goto st165;
+ case 166: goto st166;
+ case 167: goto st167;
+ case 168: goto st168;
+ case 169: goto st169;
+ case 170: goto st170;
+ case 171: goto st171;
+ case 172: goto st172;
+ case 173: goto st173;
+ case 174: goto st174;
+ case 22: goto st22;
+ default: break;
+ }
+
+ if ( ++p == pe )
+ goto _out;
+_resume:
+ switch ( cs )
+ {
+tr2:
+#line 899 "rlscan.rl"
+ {tokend = p+1;{ pass( IMP_Literal, tokstart, tokend ); }{p = ((tokend))-1;}}
+ goto st23;
+tr10:
+#line 898 "rlscan.rl"
+ {tokend = p+1;{ pass(); }{p = ((tokend))-1;}}
+ goto st23;
+tr12:
+#line 532 "rlscan.rl"
+ {
+ lastnl = p;
+ column = 0;
+ line++;
+ }
+#line 898 "rlscan.rl"
+ {tokend = p+1;{ pass(); }{p = ((tokend))-1;}}
+ goto st23;
+tr41:
+#line 915 "rlscan.rl"
+ {tokend = p+1;{ pass( *tokstart, 0, 0 ); }{p = ((tokend))-1;}}
+ goto st23;
+tr42:
+#line 914 "rlscan.rl"
+ {tokend = p+1;{p = ((tokend))-1;}}
+ goto st23;
+tr52:
+#line 913 "rlscan.rl"
+ {tokend = p;{ pass(); }{p = ((tokend))-1;}}
+ goto st23;
+tr53:
+#line 915 "rlscan.rl"
+ {tokend = p;{ pass( *tokstart, 0, 0 ); }{p = ((tokend))-1;}}
+ goto st23;
+tr55:
+#line 907 "rlscan.rl"
+ {tokend = p;{
+ updateCol();
+ singleLineSpec = true;
+ startSection();
+ {{p = ((tokend))-1;}{goto st88;}}
+ }{p = ((tokend))-1;}}
+ goto st23;
+tr56:
+#line 901 "rlscan.rl"
+ {tokend = p+1;{
+ updateCol();
+ singleLineSpec = false;
+ startSection();
+ {{p = ((tokend))-1;}{goto st88;}}
+ }{p = ((tokend))-1;}}
+ goto st23;
+tr57:
+#line 897 "rlscan.rl"
+ {tokend = p;{ pass( IMP_UInt, tokstart, tokend ); }{p = ((tokend))-1;}}
+ goto st23;
+tr58:
+#line 1 "rlscan.rl"
+ { switch( act ) {
+ case 137:
+ { pass( IMP_Define, 0, 0 ); }
+ break;
+ case 138:
+ { pass( IMP_Word, tokstart, tokend ); }
+ break;
+ default: break;
+ }
+ {p = ((tokend))-1;}}
+ goto st23;
+tr59:
+#line 896 "rlscan.rl"
+ {tokend = p;{ pass( IMP_Word, tokstart, tokend ); }{p = ((tokend))-1;}}
+ goto st23;
+st23:
+#line 1 "rlscan.rl"
+ {tokstart = 0;}
+ if ( ++p == pe )
+ goto _out23;
+case 23:
+#line 1 "rlscan.rl"
+ {tokstart = p;}
+#line 1105 "rlscan.cpp"
+ switch( (*p) ) {
+ case 0: goto tr42;
+ case 9: goto st24;
+ case 10: goto tr44;
+ case 32: goto st24;
+ case 34: goto tr45;
+ case 37: goto st26;
+ case 39: goto tr47;
+ case 47: goto tr48;
+ case 95: goto tr50;
+ case 100: goto st32;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto st30;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr50;
+ } else
+ goto tr50;
+ goto tr41;
+tr44:
+#line 532 "rlscan.rl"
+ {
+ lastnl = p;
+ column = 0;
+ line++;
+ }
+ goto st24;
+st24:
+ if ( ++p == pe )
+ goto _out24;
+case 24:
+#line 1139 "rlscan.cpp"
+ switch( (*p) ) {
+ case 9: goto st24;
+ case 10: goto tr44;
+ case 32: goto st24;
+ }
+ goto tr52;
+tr45:
+#line 1 "rlscan.rl"
+ {tokend = p+1;}
+ goto st25;
+st25:
+ if ( ++p == pe )
+ goto _out25;
+case 25:
+#line 1154 "rlscan.cpp"
+ switch( (*p) ) {
+ case 10: goto tr1;
+ case 34: goto tr2;
+ case 92: goto st2;
+ }
+ goto st1;
+tr1:
+#line 532 "rlscan.rl"
+ {
+ lastnl = p;
+ column = 0;
+ line++;
+ }
+ goto st1;
+st1:
+ if ( ++p == pe )
+ goto _out1;
+case 1:
+#line 1173 "rlscan.cpp"
+ switch( (*p) ) {
+ case 10: goto tr1;
+ case 34: goto tr2;
+ case 92: goto st2;
+ }
+ goto st1;
+st2:
+ if ( ++p == pe )
+ goto _out2;
+case 2:
+ if ( (*p) == 10 )
+ goto tr1;
+ goto st1;
+st26:
+ if ( ++p == pe )
+ goto _out26;
+case 26:
+ if ( (*p) == 37 )
+ goto st27;
+ goto tr53;
+st27:
+ if ( ++p == pe )
+ goto _out27;
+case 27:
+ if ( (*p) == 123 )
+ goto tr56;
+ goto tr55;
+tr47:
+#line 1 "rlscan.rl"
+ {tokend = p+1;}
+ goto st28;
+st28:
+ if ( ++p == pe )
+ goto _out28;
+case 28:
+#line 1209 "rlscan.cpp"
+ switch( (*p) ) {
+ case 10: goto tr5;
+ case 39: goto tr2;
+ case 92: goto st4;
+ }
+ goto st3;
+tr5:
+#line 532 "rlscan.rl"
+ {
+ lastnl = p;
+ column = 0;
+ line++;
+ }
+ goto st3;
+st3:
+ if ( ++p == pe )
+ goto _out3;
+case 3:
+#line 1228 "rlscan.cpp"
+ switch( (*p) ) {
+ case 10: goto tr5;
+ case 39: goto tr2;
+ case 92: goto st4;
+ }
+ goto st3;
+st4:
+ if ( ++p == pe )
+ goto _out4;
+case 4:
+ if ( (*p) == 10 )
+ goto tr5;
+ goto st3;
+tr48:
+#line 1 "rlscan.rl"
+ {tokend = p+1;}
+ goto st29;
+st29:
+ if ( ++p == pe )
+ goto _out29;
+case 29:
+#line 1250 "rlscan.cpp"
+ switch( (*p) ) {
+ case 42: goto st5;
+ case 47: goto st7;
+ }
+ goto tr53;
+tr8:
+#line 532 "rlscan.rl"
+ {
+ lastnl = p;
+ column = 0;
+ line++;
+ }
+ goto st5;
+st5:
+ if ( ++p == pe )
+ goto _out5;
+case 5:
+#line 1268 "rlscan.cpp"
+ switch( (*p) ) {
+ case 10: goto tr8;
+ case 42: goto st6;
+ }
+ goto st5;
+st6:
+ if ( ++p == pe )
+ goto _out6;
+case 6:
+ switch( (*p) ) {
+ case 10: goto tr8;
+ case 42: goto st6;
+ case 47: goto tr10;
+ }
+ goto st5;
+st7:
+ if ( ++p == pe )
+ goto _out7;
+case 7:
+ if ( (*p) == 10 )
+ goto tr12;
+ goto st7;
+st30:
+ if ( ++p == pe )
+ goto _out30;
+case 30:
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto st30;
+ goto tr57;
+tr50:
+#line 1 "rlscan.rl"
+ {tokend = p+1;}
+#line 896 "rlscan.rl"
+ {act = 138;}
+ goto st31;
+tr64:
+#line 1 "rlscan.rl"
+ {tokend = p+1;}
+#line 895 "rlscan.rl"
+ {act = 137;}
+ goto st31;
+st31:
+ if ( ++p == pe )
+ goto _out31;
+case 31:
+#line 1314 "rlscan.cpp"
+ if ( (*p) == 95 )
+ goto tr50;
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr50;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr50;
+ } else
+ goto tr50;
+ goto tr58;
+st32:
+ if ( ++p == pe )
+ goto _out32;
+case 32:
+ switch( (*p) ) {
+ case 95: goto tr50;
+ case 101: goto st33;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr50;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr50;
+ } else
+ goto tr50;
+ goto tr59;
+st33:
+ if ( ++p == pe )
+ goto _out33;
+case 33:
+ switch( (*p) ) {
+ case 95: goto tr50;
+ case 102: goto st34;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr50;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr50;
+ } else
+ goto tr50;
+ goto tr59;
+st34:
+ if ( ++p == pe )
+ goto _out34;
+case 34:
+ switch( (*p) ) {
+ case 95: goto tr50;
+ case 105: goto st35;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr50;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr50;
+ } else
+ goto tr50;
+ goto tr59;
+st35:
+ if ( ++p == pe )
+ goto _out35;
+case 35:
+ switch( (*p) ) {
+ case 95: goto tr50;
+ case 110: goto st36;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr50;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr50;
+ } else
+ goto tr50;
+ goto tr59;
+st36:
+ if ( ++p == pe )
+ goto _out36;
+case 36:
+ switch( (*p) ) {
+ case 95: goto tr50;
+ case 101: goto tr64;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr50;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr50;
+ } else
+ goto tr50;
+ goto tr59;
+tr15:
+#line 606 "rlscan.rl"
+ {tokend = p+1;{ token( IL_Literal, tokstart, tokend ); }{p = ((tokend))-1;}}
+ goto st37;
+tr23:
+#line 612 "rlscan.rl"
+ {tokend = p+1;{ token( IL_Comment, tokstart, tokend ); }{p = ((tokend))-1;}}
+ goto st37;
+tr25:
+#line 532 "rlscan.rl"
+ {
+ lastnl = p;
+ column = 0;
+ line++;
+ }
+#line 612 "rlscan.rl"
+ {tokend = p+1;{ token( IL_Comment, tokstart, tokend ); }{p = ((tokend))-1;}}
+ goto st37;
+tr26:
+#line 602 "rlscan.rl"
+ {{ token( TK_UInt, tokstart, tokend ); }{p = ((tokend))-1;}}
+ goto st37;
+tr65:
+#line 659 "rlscan.rl"
+ {tokend = p+1;{ token( IL_Symbol, tokstart, tokend ); }{p = ((tokend))-1;}}
+ goto st37;
+tr66:
+#line 654 "rlscan.rl"
+ {tokend = p+1;{
+ scan_error() << "unterminated code block" << endl;
+ }{p = ((tokend))-1;}}
+ goto st37;
+tr71:
+#line 634 "rlscan.rl"
+ {tokend = p+1;{ token( *tokstart, tokstart, tokend ); }{p = ((tokend))-1;}}
+ goto st37;
+tr72:
+#line 629 "rlscan.rl"
+ {tokend = p+1;{
+ whitespaceOn = true;
+ token( *tokstart, tokstart, tokend );
+ }{p = ((tokend))-1;}}
+ goto st37;
+tr77:
+#line 622 "rlscan.rl"
+ {tokend = p+1;{
+ whitespaceOn = true;
+ token( *tokstart, tokstart, tokend );
+ if ( inlineBlockType == SemiTerminated )
+ {{p = ((tokend))-1;}{goto st88;}}
+ }{p = ((tokend))-1;}}
+ goto st37;
+tr80:
+#line 636 "rlscan.rl"
+ {tokend = p+1;{
+ token( IL_Symbol, tokstart, tokend );
+ curly_count += 1;
+ }{p = ((tokend))-1;}}
+ goto st37;
+tr81:
+#line 641 "rlscan.rl"
+ {tokend = p+1;{
+ if ( --curly_count == 0 && inlineBlockType == CurlyDelimited ) {
+ /* Inline code block ends. */
+ token( '}' );
+ {{p = ((tokend))-1;}{goto st88;}}
+ }
+ else {
+ /* Either a semi terminated inline block or only the closing
+ * brace of some inner scope, not the block's closing brace. */
+ token( IL_Symbol, tokstart, tokend );
+ }
+ }{p = ((tokend))-1;}}
+ goto st37;
+tr82:
+#line 608 "rlscan.rl"
+ {tokend = p;{
+ if ( whitespaceOn )
+ token( IL_WhiteSpace, tokstart, tokend );
+ }{p = ((tokend))-1;}}
+ goto st37;
+tr83:
+#line 659 "rlscan.rl"
+ {tokend = p;{ token( IL_Symbol, tokstart, tokend ); }{p = ((tokend))-1;}}
+ goto st37;
+tr84:
+#line 602 "rlscan.rl"
+ {tokend = p;{ token( TK_UInt, tokstart, tokend ); }{p = ((tokend))-1;}}
+ goto st37;
+tr86:
+#line 603 "rlscan.rl"
+ {tokend = p;{ token( TK_Hex, tokstart, tokend ); }{p = ((tokend))-1;}}
+ goto st37;
+tr87:
+#line 614 "rlscan.rl"
+ {tokend = p+1;{ token( TK_NameSep, tokstart, tokend ); }{p = ((tokend))-1;}}
+ goto st37;
+tr88:
+#line 1 "rlscan.rl"
+ { switch( act ) {
+ case 1:
+ { token( KW_PChar ); }
+ break;
+ case 3:
+ { token( KW_CurState ); }
+ break;
+ case 4:
+ { token( KW_TargState ); }
+ break;
+ case 5:
+ {
+ whitespaceOn = false;
+ token( KW_Entry );
+ }
+ break;
+ case 6:
+ {
+ whitespaceOn = false;
+ token( KW_Hold );
+ }
+ break;
+ case 7:
+ { token( KW_Exec, 0, 0 ); }
+ break;
+ case 8:
+ {
+ whitespaceOn = false;
+ token( KW_Goto );
+ }
+ break;
+ case 9:
+ {
+ whitespaceOn = false;
+ token( KW_Next );
+ }
+ break;
+ case 10:
+ {
+ whitespaceOn = false;
+ token( KW_Call );
+ }
+ break;
+ case 11:
+ {
+ whitespaceOn = false;
+ token( KW_Ret );
+ }
+ break;
+ case 12:
+ {
+ whitespaceOn = false;
+ token( KW_Break );
+ }
+ break;
+ case 13:
+ { token( TK_Word, tokstart, tokend ); }
+ break;
+ default: break;
+ }
+ {p = ((tokend))-1;}}
+ goto st37;
+tr89:
+#line 600 "rlscan.rl"
+ {tokend = p;{ token( TK_Word, tokstart, tokend ); }{p = ((tokend))-1;}}
+ goto st37;
+tr103:
+#line 565 "rlscan.rl"
+ {tokend = p;{ token( KW_Char ); }{p = ((tokend))-1;}}
+ goto st37;
+st37:
+#line 1 "rlscan.rl"
+ {tokstart = 0;}
+ if ( ++p == pe )
+ goto _out37;
+case 37:
+#line 1 "rlscan.rl"
+ {tokstart = p;}
+#line 1588 "rlscan.cpp"
+ switch( (*p) ) {
+ case 0: goto tr66;
+ case 9: goto st38;
+ case 10: goto tr68;
+ case 32: goto st38;
+ case 34: goto tr69;
+ case 39: goto tr70;
+ case 40: goto tr71;
+ case 44: goto tr71;
+ case 47: goto tr73;
+ case 48: goto tr74;
+ case 58: goto st45;
+ case 59: goto tr77;
+ case 95: goto tr78;
+ case 102: goto st47;
+ case 123: goto tr80;
+ case 125: goto tr81;
+ }
+ if ( (*p) < 49 ) {
+ if ( 41 <= (*p) && (*p) <= 42 )
+ goto tr72;
+ } else if ( (*p) > 57 ) {
+ if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr78;
+ } else if ( (*p) >= 65 )
+ goto tr78;
+ } else
+ goto st43;
+ goto tr65;
+tr68:
+#line 532 "rlscan.rl"
+ {
+ lastnl = p;
+ column = 0;
+ line++;
+ }
+ goto st38;
+st38:
+ if ( ++p == pe )
+ goto _out38;
+case 38:
+#line 1631 "rlscan.cpp"
+ switch( (*p) ) {
+ case 9: goto st38;
+ case 10: goto tr68;
+ case 32: goto st38;
+ }
+ goto tr82;
+tr69:
+#line 1 "rlscan.rl"
+ {tokend = p+1;}
+ goto st39;
+st39:
+ if ( ++p == pe )
+ goto _out39;
+case 39:
+#line 1646 "rlscan.cpp"
+ switch( (*p) ) {
+ case 10: goto tr14;
+ case 34: goto tr15;
+ case 92: goto st9;
+ }
+ goto st8;
+tr14:
+#line 532 "rlscan.rl"
+ {
+ lastnl = p;
+ column = 0;
+ line++;
+ }
+ goto st8;
+st8:
+ if ( ++p == pe )
+ goto _out8;
+case 8:
+#line 1665 "rlscan.cpp"
+ switch( (*p) ) {
+ case 10: goto tr14;
+ case 34: goto tr15;
+ case 92: goto st9;
+ }
+ goto st8;
+st9:
+ if ( ++p == pe )
+ goto _out9;
+case 9:
+ if ( (*p) == 10 )
+ goto tr14;
+ goto st8;
+tr70:
+#line 1 "rlscan.rl"
+ {tokend = p+1;}
+ goto st40;
+st40:
+ if ( ++p == pe )
+ goto _out40;
+case 40:
+#line 1687 "rlscan.cpp"
+ switch( (*p) ) {
+ case 10: goto tr18;
+ case 39: goto tr15;
+ case 92: goto st11;
+ }
+ goto st10;
+tr18:
+#line 532 "rlscan.rl"
+ {
+ lastnl = p;
+ column = 0;
+ line++;
+ }
+ goto st10;
+st10:
+ if ( ++p == pe )
+ goto _out10;
+case 10:
+#line 1706 "rlscan.cpp"
+ switch( (*p) ) {
+ case 10: goto tr18;
+ case 39: goto tr15;
+ case 92: goto st11;
+ }
+ goto st10;
+st11:
+ if ( ++p == pe )
+ goto _out11;
+case 11:
+ if ( (*p) == 10 )
+ goto tr18;
+ goto st10;
+tr73:
+#line 1 "rlscan.rl"
+ {tokend = p+1;}
+ goto st41;
+st41:
+ if ( ++p == pe )
+ goto _out41;
+case 41:
+#line 1728 "rlscan.cpp"
+ switch( (*p) ) {
+ case 42: goto st12;
+ case 47: goto st14;
+ }
+ goto tr83;
+tr21:
+#line 532 "rlscan.rl"
+ {
+ lastnl = p;
+ column = 0;
+ line++;
+ }
+ goto st12;
+st12:
+ if ( ++p == pe )
+ goto _out12;
+case 12:
+#line 1746 "rlscan.cpp"
+ switch( (*p) ) {
+ case 10: goto tr21;
+ case 42: goto st13;
+ }
+ goto st12;
+st13:
+ if ( ++p == pe )
+ goto _out13;
+case 13:
+ switch( (*p) ) {
+ case 10: goto tr21;
+ case 42: goto st13;
+ case 47: goto tr23;
+ }
+ goto st12;
+st14:
+ if ( ++p == pe )
+ goto _out14;
+case 14:
+ if ( (*p) == 10 )
+ goto tr25;
+ goto st14;
+tr74:
+#line 1 "rlscan.rl"
+ {tokend = p+1;}
+ goto st42;
+st42:
+ if ( ++p == pe )
+ goto _out42;
+case 42:
+#line 1777 "rlscan.cpp"
+ if ( (*p) == 120 )
+ goto st15;
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto st43;
+ goto tr84;
+st43:
+ if ( ++p == pe )
+ goto _out43;
+case 43:
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto st43;
+ goto tr84;
+st15:
+ if ( ++p == pe )
+ goto _out15;
+case 15:
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto st44;
+ } else if ( (*p) > 70 ) {
+ if ( 97 <= (*p) && (*p) <= 102 )
+ goto st44;
+ } else
+ goto st44;
+ goto tr26;
+st44:
+ if ( ++p == pe )
+ goto _out44;
+case 44:
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto st44;
+ } else if ( (*p) > 70 ) {
+ if ( 97 <= (*p) && (*p) <= 102 )
+ goto st44;
+ } else
+ goto st44;
+ goto tr86;
+st45:
+ if ( ++p == pe )
+ goto _out45;
+case 45:
+ if ( (*p) == 58 )
+ goto tr87;
+ goto tr83;
+tr78:
+#line 1 "rlscan.rl"
+ {tokend = p+1;}
+#line 600 "rlscan.rl"
+ {act = 13;}
+ goto st46;
+tr102:
+#line 1 "rlscan.rl"
+ {tokend = p+1;}
+#line 595 "rlscan.rl"
+ {act = 12;}
+ goto st46;
+tr107:
+#line 1 "rlscan.rl"
+ {tokend = p+1;}
+#line 587 "rlscan.rl"
+ {act = 10;}
+ goto st46;
+tr109:
+#line 1 "rlscan.rl"
+ {tokend = p+1;}
+#line 566 "rlscan.rl"
+ {act = 3;}
+ goto st46;
+tr114:
+#line 1 "rlscan.rl"
+ {tokend = p+1;}
+#line 568 "rlscan.rl"
+ {act = 5;}
+ goto st46;
+tr116:
+#line 1 "rlscan.rl"
+ {tokend = p+1;}
+#line 578 "rlscan.rl"
+ {act = 7;}
+ goto st46;
+tr119:
+#line 1 "rlscan.rl"
+ {tokend = p+1;}
+#line 579 "rlscan.rl"
+ {act = 8;}
+ goto st46;
+tr122:
+#line 1 "rlscan.rl"
+ {tokend = p+1;}
+#line 574 "rlscan.rl"
+ {act = 6;}
+ goto st46;
+tr125:
+#line 1 "rlscan.rl"
+ {tokend = p+1;}
+#line 583 "rlscan.rl"
+ {act = 9;}
+ goto st46;
+tr126:
+#line 1 "rlscan.rl"
+ {tokend = p+1;}
+#line 564 "rlscan.rl"
+ {act = 1;}
+ goto st46;
+tr128:
+#line 1 "rlscan.rl"
+ {tokend = p+1;}
+#line 591 "rlscan.rl"
+ {act = 11;}
+ goto st46;
+tr132:
+#line 1 "rlscan.rl"
+ {tokend = p+1;}
+#line 567 "rlscan.rl"
+ {act = 4;}
+ goto st46;
+st46:
+ if ( ++p == pe )
+ goto _out46;
+case 46:
+#line 1899 "rlscan.cpp"
+ if ( (*p) == 95 )
+ goto tr78;
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr78;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr78;
+ } else
+ goto tr78;
+ goto tr88;
+st47:
+ if ( ++p == pe )
+ goto _out47;
+case 47:
+ switch( (*p) ) {
+ case 95: goto tr78;
+ case 98: goto st48;
+ case 99: goto st52;
+ case 101: goto st57;
+ case 103: goto st63;
+ case 104: goto st66;
+ case 110: goto st69;
+ case 112: goto st72;
+ case 114: goto st73;
+ case 116: goto st75;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr78;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr78;
+ } else
+ goto tr78;
+ goto tr89;
+st48:
+ if ( ++p == pe )
+ goto _out48;
+case 48:
+ switch( (*p) ) {
+ case 95: goto tr78;
+ case 114: goto st49;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr78;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr78;
+ } else
+ goto tr78;
+ goto tr89;
+st49:
+ if ( ++p == pe )
+ goto _out49;
+case 49:
+ switch( (*p) ) {
+ case 95: goto tr78;
+ case 101: goto st50;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr78;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr78;
+ } else
+ goto tr78;
+ goto tr89;
+st50:
+ if ( ++p == pe )
+ goto _out50;
+case 50:
+ switch( (*p) ) {
+ case 95: goto tr78;
+ case 97: goto st51;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr78;
+ } else if ( (*p) > 90 ) {
+ if ( 98 <= (*p) && (*p) <= 122 )
+ goto tr78;
+ } else
+ goto tr78;
+ goto tr89;
+st51:
+ if ( ++p == pe )
+ goto _out51;
+case 51:
+ switch( (*p) ) {
+ case 95: goto tr78;
+ case 107: goto tr102;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr78;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr78;
+ } else
+ goto tr78;
+ goto tr89;
+st52:
+ if ( ++p == pe )
+ goto _out52;
+case 52:
+ switch( (*p) ) {
+ case 95: goto tr78;
+ case 97: goto st53;
+ case 117: goto st55;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr78;
+ } else if ( (*p) > 90 ) {
+ if ( 98 <= (*p) && (*p) <= 122 )
+ goto tr78;
+ } else
+ goto tr78;
+ goto tr103;
+st53:
+ if ( ++p == pe )
+ goto _out53;
+case 53:
+ switch( (*p) ) {
+ case 95: goto tr78;
+ case 108: goto st54;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr78;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr78;
+ } else
+ goto tr78;
+ goto tr89;
+st54:
+ if ( ++p == pe )
+ goto _out54;
+case 54:
+ switch( (*p) ) {
+ case 95: goto tr78;
+ case 108: goto tr107;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr78;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr78;
+ } else
+ goto tr78;
+ goto tr89;
+st55:
+ if ( ++p == pe )
+ goto _out55;
+case 55:
+ switch( (*p) ) {
+ case 95: goto tr78;
+ case 114: goto st56;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr78;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr78;
+ } else
+ goto tr78;
+ goto tr89;
+st56:
+ if ( ++p == pe )
+ goto _out56;
+case 56:
+ switch( (*p) ) {
+ case 95: goto tr78;
+ case 115: goto tr109;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr78;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr78;
+ } else
+ goto tr78;
+ goto tr89;
+st57:
+ if ( ++p == pe )
+ goto _out57;
+case 57:
+ switch( (*p) ) {
+ case 95: goto tr78;
+ case 110: goto st58;
+ case 120: goto st61;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr78;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr78;
+ } else
+ goto tr78;
+ goto tr89;
+st58:
+ if ( ++p == pe )
+ goto _out58;
+case 58:
+ switch( (*p) ) {
+ case 95: goto tr78;
+ case 116: goto st59;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr78;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr78;
+ } else
+ goto tr78;
+ goto tr89;
+st59:
+ if ( ++p == pe )
+ goto _out59;
+case 59:
+ switch( (*p) ) {
+ case 95: goto tr78;
+ case 114: goto st60;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr78;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr78;
+ } else
+ goto tr78;
+ goto tr89;
+st60:
+ if ( ++p == pe )
+ goto _out60;
+case 60:
+ switch( (*p) ) {
+ case 95: goto tr78;
+ case 121: goto tr114;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr78;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr78;
+ } else
+ goto tr78;
+ goto tr89;
+st61:
+ if ( ++p == pe )
+ goto _out61;
+case 61:
+ switch( (*p) ) {
+ case 95: goto tr78;
+ case 101: goto st62;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr78;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr78;
+ } else
+ goto tr78;
+ goto tr89;
+st62:
+ if ( ++p == pe )
+ goto _out62;
+case 62:
+ switch( (*p) ) {
+ case 95: goto tr78;
+ case 99: goto tr116;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr78;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr78;
+ } else
+ goto tr78;
+ goto tr89;
+st63:
+ if ( ++p == pe )
+ goto _out63;
+case 63:
+ switch( (*p) ) {
+ case 95: goto tr78;
+ case 111: goto st64;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr78;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr78;
+ } else
+ goto tr78;
+ goto tr89;
+st64:
+ if ( ++p == pe )
+ goto _out64;
+case 64:
+ switch( (*p) ) {
+ case 95: goto tr78;
+ case 116: goto st65;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr78;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr78;
+ } else
+ goto tr78;
+ goto tr89;
+st65:
+ if ( ++p == pe )
+ goto _out65;
+case 65:
+ switch( (*p) ) {
+ case 95: goto tr78;
+ case 111: goto tr119;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr78;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr78;
+ } else
+ goto tr78;
+ goto tr89;
+st66:
+ if ( ++p == pe )
+ goto _out66;
+case 66:
+ switch( (*p) ) {
+ case 95: goto tr78;
+ case 111: goto st67;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr78;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr78;
+ } else
+ goto tr78;
+ goto tr89;
+st67:
+ if ( ++p == pe )
+ goto _out67;
+case 67:
+ switch( (*p) ) {
+ case 95: goto tr78;
+ case 108: goto st68;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr78;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr78;
+ } else
+ goto tr78;
+ goto tr89;
+st68:
+ if ( ++p == pe )
+ goto _out68;
+case 68:
+ switch( (*p) ) {
+ case 95: goto tr78;
+ case 100: goto tr122;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr78;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr78;
+ } else
+ goto tr78;
+ goto tr89;
+st69:
+ if ( ++p == pe )
+ goto _out69;
+case 69:
+ switch( (*p) ) {
+ case 95: goto tr78;
+ case 101: goto st70;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr78;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr78;
+ } else
+ goto tr78;
+ goto tr89;
+st70:
+ if ( ++p == pe )
+ goto _out70;
+case 70:
+ switch( (*p) ) {
+ case 95: goto tr78;
+ case 120: goto st71;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr78;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr78;
+ } else
+ goto tr78;
+ goto tr89;
+st71:
+ if ( ++p == pe )
+ goto _out71;
+case 71:
+ switch( (*p) ) {
+ case 95: goto tr78;
+ case 116: goto tr125;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr78;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr78;
+ } else
+ goto tr78;
+ goto tr89;
+st72:
+ if ( ++p == pe )
+ goto _out72;
+case 72:
+ switch( (*p) ) {
+ case 95: goto tr78;
+ case 99: goto tr126;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr78;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr78;
+ } else
+ goto tr78;
+ goto tr89;
+st73:
+ if ( ++p == pe )
+ goto _out73;
+case 73:
+ switch( (*p) ) {
+ case 95: goto tr78;
+ case 101: goto st74;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr78;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr78;
+ } else
+ goto tr78;
+ goto tr89;
+st74:
+ if ( ++p == pe )
+ goto _out74;
+case 74:
+ switch( (*p) ) {
+ case 95: goto tr78;
+ case 116: goto tr128;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr78;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr78;
+ } else
+ goto tr78;
+ goto tr89;
+st75:
+ if ( ++p == pe )
+ goto _out75;
+case 75:
+ switch( (*p) ) {
+ case 95: goto tr78;
+ case 97: goto st76;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr78;
+ } else if ( (*p) > 90 ) {
+ if ( 98 <= (*p) && (*p) <= 122 )
+ goto tr78;
+ } else
+ goto tr78;
+ goto tr89;
+st76:
+ if ( ++p == pe )
+ goto _out76;
+case 76:
+ switch( (*p) ) {
+ case 95: goto tr78;
+ case 114: goto st77;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr78;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr78;
+ } else
+ goto tr78;
+ goto tr89;
+st77:
+ if ( ++p == pe )
+ goto _out77;
+case 77:
+ switch( (*p) ) {
+ case 95: goto tr78;
+ case 103: goto st78;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr78;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr78;
+ } else
+ goto tr78;
+ goto tr89;
+st78:
+ if ( ++p == pe )
+ goto _out78;
+case 78:
+ switch( (*p) ) {
+ case 95: goto tr78;
+ case 115: goto tr132;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr78;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr78;
+ } else
+ goto tr78;
+ goto tr89;
+tr133:
+#line 686 "rlscan.rl"
+ {tokend = p+1;{ token( RE_Char, tokstart, tokend ); }{p = ((tokend))-1;}}
+ goto st79;
+tr134:
+#line 681 "rlscan.rl"
+ {tokend = p+1;{
+ scan_error() << "unterminated OR literal" << endl;
+ }{p = ((tokend))-1;}}
+ goto st79;
+tr135:
+#line 676 "rlscan.rl"
+ {tokend = p+1;{ token( RE_Dash, 0, 0 ); }{p = ((tokend))-1;}}
+ goto st79;
+tr137:
+#line 679 "rlscan.rl"
+ {tokend = p+1;{ token( RE_SqClose ); {{p = ((tokend))-1;}{cs = stack[--top]; goto _again;}} }{p = ((tokend))-1;}}
+ goto st79;
+tr138:
+#line 673 "rlscan.rl"
+ {tokend = p+1;{ token( RE_Char, tokstart+1, tokend ); }{p = ((tokend))-1;}}
+ goto st79;
+tr139:
+#line 672 "rlscan.rl"
+ {tokend = p+1;{ updateCol(); }{p = ((tokend))-1;}}
+ goto st79;
+tr140:
+#line 664 "rlscan.rl"
+ {tokend = p+1;{ token( RE_Char, '\0' ); }{p = ((tokend))-1;}}
+ goto st79;
+tr141:
+#line 665 "rlscan.rl"
+ {tokend = p+1;{ token( RE_Char, '\a' ); }{p = ((tokend))-1;}}
+ goto st79;
+tr142:
+#line 666 "rlscan.rl"
+ {tokend = p+1;{ token( RE_Char, '\b' ); }{p = ((tokend))-1;}}
+ goto st79;
+tr143:
+#line 670 "rlscan.rl"
+ {tokend = p+1;{ token( RE_Char, '\f' ); }{p = ((tokend))-1;}}
+ goto st79;
+tr144:
+#line 668 "rlscan.rl"
+ {tokend = p+1;{ token( RE_Char, '\n' ); }{p = ((tokend))-1;}}
+ goto st79;
+tr145:
+#line 671 "rlscan.rl"
+ {tokend = p+1;{ token( RE_Char, '\r' ); }{p = ((tokend))-1;}}
+ goto st79;
+tr146:
+#line 667 "rlscan.rl"
+ {tokend = p+1;{ token( RE_Char, '\t' ); }{p = ((tokend))-1;}}
+ goto st79;
+tr147:
+#line 669 "rlscan.rl"
+ {tokend = p+1;{ token( RE_Char, '\v' ); }{p = ((tokend))-1;}}
+ goto st79;
+st79:
+#line 1 "rlscan.rl"
+ {tokstart = 0;}
+ if ( ++p == pe )
+ goto _out79;
+case 79:
+#line 1 "rlscan.rl"
+ {tokstart = p;}
+#line 2531 "rlscan.cpp"
+ switch( (*p) ) {
+ case 0: goto tr134;
+ case 45: goto tr135;
+ case 92: goto st80;
+ case 93: goto tr137;
+ }
+ goto tr133;
+st80:
+ if ( ++p == pe )
+ goto _out80;
+case 80:
+ switch( (*p) ) {
+ case 10: goto tr139;
+ case 48: goto tr140;
+ case 97: goto tr141;
+ case 98: goto tr142;
+ case 102: goto tr143;
+ case 110: goto tr144;
+ case 114: goto tr145;
+ case 116: goto tr146;
+ case 118: goto tr147;
+ }
+ goto tr138;
+tr148:
+#line 721 "rlscan.rl"
+ {tokend = p+1;{ token( RE_Char, tokstart, tokend ); }{p = ((tokend))-1;}}
+ goto st81;
+tr149:
+#line 716 "rlscan.rl"
+ {tokend = p+1;{
+ scan_error() << "unterminated regular expression" << endl;
+ }{p = ((tokend))-1;}}
+ goto st81;
+tr150:
+#line 711 "rlscan.rl"
+ {tokend = p+1;{ token( RE_Star ); }{p = ((tokend))-1;}}
+ goto st81;
+tr151:
+#line 710 "rlscan.rl"
+ {tokend = p+1;{ token( RE_Dot ); }{p = ((tokend))-1;}}
+ goto st81;
+tr155:
+#line 704 "rlscan.rl"
+ {tokend = p;{
+ token( RE_Slash, tokstart, tokend );
+ {{p = ((tokend))-1;}{goto st88;}}
+ }{p = ((tokend))-1;}}
+ goto st81;
+tr156:
+#line 704 "rlscan.rl"
+ {tokend = p+1;{
+ token( RE_Slash, tokstart, tokend );
+ {{p = ((tokend))-1;}{goto st88;}}
+ }{p = ((tokend))-1;}}
+ goto st81;
+tr157:
+#line 713 "rlscan.rl"
+ {tokend = p;{ token( RE_SqOpen ); {{p = ((tokend))-1;}{stack[top++] = 81; goto st79;}} }{p = ((tokend))-1;}}
+ goto st81;
+tr158:
+#line 714 "rlscan.rl"
+ {tokend = p+1;{ token( RE_SqOpenNeg ); {{p = ((tokend))-1;}{stack[top++] = 81; goto st79;}} }{p = ((tokend))-1;}}
+ goto st81;
+tr159:
+#line 701 "rlscan.rl"
+ {tokend = p+1;{ token( RE_Char, tokstart+1, tokend ); }{p = ((tokend))-1;}}
+ goto st81;
+tr160:
+#line 700 "rlscan.rl"
+ {tokend = p+1;{ updateCol(); }{p = ((tokend))-1;}}
+ goto st81;
+tr161:
+#line 692 "rlscan.rl"
+ {tokend = p+1;{ token( RE_Char, '\0' ); }{p = ((tokend))-1;}}
+ goto st81;
+tr162:
+#line 693 "rlscan.rl"
+ {tokend = p+1;{ token( RE_Char, '\a' ); }{p = ((tokend))-1;}}
+ goto st81;
+tr163:
+#line 694 "rlscan.rl"
+ {tokend = p+1;{ token( RE_Char, '\b' ); }{p = ((tokend))-1;}}
+ goto st81;
+tr164:
+#line 698 "rlscan.rl"
+ {tokend = p+1;{ token( RE_Char, '\f' ); }{p = ((tokend))-1;}}
+ goto st81;
+tr165:
+#line 696 "rlscan.rl"
+ {tokend = p+1;{ token( RE_Char, '\n' ); }{p = ((tokend))-1;}}
+ goto st81;
+tr166:
+#line 699 "rlscan.rl"
+ {tokend = p+1;{ token( RE_Char, '\r' ); }{p = ((tokend))-1;}}
+ goto st81;
+tr167:
+#line 695 "rlscan.rl"
+ {tokend = p+1;{ token( RE_Char, '\t' ); }{p = ((tokend))-1;}}
+ goto st81;
+tr168:
+#line 697 "rlscan.rl"
+ {tokend = p+1;{ token( RE_Char, '\v' ); }{p = ((tokend))-1;}}
+ goto st81;
+st81:
+#line 1 "rlscan.rl"
+ {tokstart = 0;}
+ if ( ++p == pe )
+ goto _out81;
+case 81:
+#line 1 "rlscan.rl"
+ {tokstart = p;}
+#line 2643 "rlscan.cpp"
+ switch( (*p) ) {
+ case 0: goto tr149;
+ case 42: goto tr150;
+ case 46: goto tr151;
+ case 47: goto st82;
+ case 91: goto st83;
+ case 92: goto st84;
+ }
+ goto tr148;
+st82:
+ if ( ++p == pe )
+ goto _out82;
+case 82:
+ if ( (*p) == 105 )
+ goto tr156;
+ goto tr155;
+st83:
+ if ( ++p == pe )
+ goto _out83;
+case 83:
+ if ( (*p) == 94 )
+ goto tr158;
+ goto tr157;
+st84:
+ if ( ++p == pe )
+ goto _out84;
+case 84:
+ switch( (*p) ) {
+ case 10: goto tr160;
+ case 48: goto tr161;
+ case 97: goto tr162;
+ case 98: goto tr163;
+ case 102: goto tr164;
+ case 110: goto tr165;
+ case 114: goto tr166;
+ case 116: goto tr167;
+ case 118: goto tr168;
+ }
+ goto tr159;
+tr169:
+#line 730 "rlscan.rl"
+ {tokend = p+1;{
+ scan_error() << "unterminated write statement" << endl;
+ }{p = ((tokend))-1;}}
+ goto st85;
+tr172:
+#line 728 "rlscan.rl"
+ {tokend = p+1;{ token( ';' ); {{p = ((tokend))-1;}{goto st88;}} }{p = ((tokend))-1;}}
+ goto st85;
+tr174:
+#line 727 "rlscan.rl"
+ {tokend = p;{ updateCol(); }{p = ((tokend))-1;}}
+ goto st85;
+tr175:
+#line 726 "rlscan.rl"
+ {tokend = p;{ token( TK_Word, tokstart, tokend ); }{p = ((tokend))-1;}}
+ goto st85;
+st85:
+#line 1 "rlscan.rl"
+ {tokstart = 0;}
+ if ( ++p == pe )
+ goto _out85;
+case 85:
+#line 1 "rlscan.rl"
+ {tokstart = p;}
+#line 2709 "rlscan.cpp"
+ switch( (*p) ) {
+ case 0: goto tr169;
+ case 32: goto st86;
+ case 59: goto tr172;
+ case 95: goto st87;
+ }
+ if ( (*p) < 65 ) {
+ if ( 9 <= (*p) && (*p) <= 10 )
+ goto st86;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto st87;
+ } else
+ goto st87;
+ goto st0;
+st0:
+ goto _out0;
+st86:
+ if ( ++p == pe )
+ goto _out86;
+case 86:
+ if ( (*p) == 32 )
+ goto st86;
+ if ( 9 <= (*p) && (*p) <= 10 )
+ goto st86;
+ goto tr174;
+st87:
+ if ( ++p == pe )
+ goto _out87;
+case 87:
+ if ( (*p) == 95 )
+ goto st87;
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto st87;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto st87;
+ } else
+ goto st87;
+ goto tr175;
+tr33:
+#line 532 "rlscan.rl"
+ {
+ lastnl = p;
+ column = 0;
+ line++;
+ }
+#line 790 "rlscan.rl"
+ {tokend = p+1;{ updateCol(); }{p = ((tokend))-1;}}
+ goto st88;
+tr37:
+#line 777 "rlscan.rl"
+ {{ token( TK_UInt, tokstart, tokend ); }{p = ((tokend))-1;}}
+ goto st88;
+tr39:
+#line 890 "rlscan.rl"
+ {{ token( *tokstart ); }{p = ((tokend))-1;}}
+ goto st88;
+tr40:
+#line 858 "rlscan.rl"
+ {tokend = p+1;{
+ updateCol();
+ endSection();
+ {{p = ((tokend))-1;}{goto st23;}}
+ }{p = ((tokend))-1;}}
+ goto st88;
+tr176:
+#line 890 "rlscan.rl"
+ {tokend = p+1;{ token( *tokstart ); }{p = ((tokend))-1;}}
+ goto st88;
+tr177:
+#line 886 "rlscan.rl"
+ {tokend = p+1;{
+ scan_error() << "unterminated ragel section" << endl;
+ }{p = ((tokend))-1;}}
+ goto st88;
+tr179:
+#line 532 "rlscan.rl"
+ {
+ lastnl = p;
+ column = 0;
+ line++;
+ }
+#line 867 "rlscan.rl"
+ {tokend = p+1;{
+ updateCol();
+ if ( singleLineSpec ) {
+ endSection();
+ {{p = ((tokend))-1;}{goto st23;}}
+ }
+ }{p = ((tokend))-1;}}
+ goto st88;
+tr188:
+#line 787 "rlscan.rl"
+ {tokend = p+1;{ token( RE_Slash ); {{p = ((tokend))-1;}{goto st81;}} }{p = ((tokend))-1;}}
+ goto st88;
+tr208:
+#line 875 "rlscan.rl"
+ {tokend = p+1;{
+ if ( lastToken == KW_Export || lastToken == KW_Entry )
+ token( '{' );
+ else {
+ token( '{' );
+ curly_count = 1;
+ inlineBlockType = CurlyDelimited;
+ {{p = ((tokend))-1;}{goto st37;}}
+ }
+ }{p = ((tokend))-1;}}
+ goto st88;
+tr211:
+#line 864 "rlscan.rl"
+ {tokend = p;{ updateCol(); }{p = ((tokend))-1;}}
+ goto st88;
+tr212:
+#line 782 "rlscan.rl"
+ {tokend = p;{ token( TK_Literal, tokstart, tokend ); }{p = ((tokend))-1;}}
+ goto st88;
+tr213:
+#line 782 "rlscan.rl"
+ {tokend = p+1;{ token( TK_Literal, tokstart, tokend ); }{p = ((tokend))-1;}}
+ goto st88;
+tr214:
+#line 890 "rlscan.rl"
+ {tokend = p;{ token( *tokstart ); }{p = ((tokend))-1;}}
+ goto st88;
+tr215:
+#line 820 "rlscan.rl"
+ {tokend = p+1;{ token( TK_AllGblError ); }{p = ((tokend))-1;}}
+ goto st88;
+tr216:
+#line 804 "rlscan.rl"
+ {tokend = p+1;{ token( TK_AllFromState ); }{p = ((tokend))-1;}}
+ goto st88;
+tr217:
+#line 812 "rlscan.rl"
+ {tokend = p+1;{ token( TK_AllEOF ); }{p = ((tokend))-1;}}
+ goto st88;
+tr218:
+#line 839 "rlscan.rl"
+ {tokend = p+1;{ token( TK_AllCond ); }{p = ((tokend))-1;}}
+ goto st88;
+tr219:
+#line 828 "rlscan.rl"
+ {tokend = p+1;{ token( TK_AllLocalError ); }{p = ((tokend))-1;}}
+ goto st88;
+tr220:
+#line 796 "rlscan.rl"
+ {tokend = p+1;{ token( TK_AllToState ); }{p = ((tokend))-1;}}
+ goto st88;
+tr221:
+#line 821 "rlscan.rl"
+ {tokend = p+1;{ token( TK_FinalGblError ); }{p = ((tokend))-1;}}
+ goto st88;
+tr222:
+#line 805 "rlscan.rl"
+ {tokend = p+1;{ token( TK_FinalFromState ); }{p = ((tokend))-1;}}
+ goto st88;
+tr223:
+#line 813 "rlscan.rl"
+ {tokend = p+1;{ token( TK_FinalEOF ); }{p = ((tokend))-1;}}
+ goto st88;
+tr224:
+#line 840 "rlscan.rl"
+ {tokend = p+1;{ token( TK_LeavingCond ); }{p = ((tokend))-1;}}
+ goto st88;
+tr225:
+#line 829 "rlscan.rl"
+ {tokend = p+1;{ token( TK_FinalLocalError ); }{p = ((tokend))-1;}}
+ goto st88;
+tr226:
+#line 797 "rlscan.rl"
+ {tokend = p+1;{ token( TK_FinalToState ); }{p = ((tokend))-1;}}
+ goto st88;
+tr227:
+#line 843 "rlscan.rl"
+ {tokend = p+1;{ token( TK_StarStar ); }{p = ((tokend))-1;}}
+ goto st88;
+tr228:
+#line 844 "rlscan.rl"
+ {tokend = p+1;{ token( TK_DashDash ); }{p = ((tokend))-1;}}
+ goto st88;
+tr229:
+#line 845 "rlscan.rl"
+ {tokend = p+1;{ token( TK_Arrow ); }{p = ((tokend))-1;}}
+ goto st88;
+tr230:
+#line 842 "rlscan.rl"
+ {tokend = p+1;{ token( TK_DotDot ); }{p = ((tokend))-1;}}
+ goto st88;
+tr231:
+#line 777 "rlscan.rl"
+ {tokend = p;{ token( TK_UInt, tokstart, tokend ); }{p = ((tokend))-1;}}
+ goto st88;
+tr233:
+#line 778 "rlscan.rl"
+ {tokend = p;{ token( TK_Hex, tokstart, tokend ); }{p = ((tokend))-1;}}
+ goto st88;
+tr234:
+#line 856 "rlscan.rl"
+ {tokend = p+1;{ token( TK_NameSep, tokstart, tokend ); }{p = ((tokend))-1;}}
+ goto st88;
+tr235:
+#line 792 "rlscan.rl"
+ {tokend = p+1;{ token( TK_ColonEquals ); }{p = ((tokend))-1;}}
+ goto st88;
+tr237:
+#line 848 "rlscan.rl"
+ {tokend = p;{ token( TK_ColonGt ); }{p = ((tokend))-1;}}
+ goto st88;
+tr238:
+#line 849 "rlscan.rl"
+ {tokend = p+1;{ token( TK_ColonGtGt ); }{p = ((tokend))-1;}}
+ goto st88;
+tr239:
+#line 822 "rlscan.rl"
+ {tokend = p+1;{ token( TK_NotStartGblError ); }{p = ((tokend))-1;}}
+ goto st88;
+tr240:
+#line 806 "rlscan.rl"
+ {tokend = p+1;{ token( TK_NotStartFromState ); }{p = ((tokend))-1;}}
+ goto st88;
+tr241:
+#line 814 "rlscan.rl"
+ {tokend = p+1;{ token( TK_NotStartEOF ); }{p = ((tokend))-1;}}
+ goto st88;
+tr242:
+#line 850 "rlscan.rl"
+ {tokend = p+1;{ token( TK_LtColon ); }{p = ((tokend))-1;}}
+ goto st88;
+tr244:
+#line 830 "rlscan.rl"
+ {tokend = p+1;{ token( TK_NotStartLocalError ); }{p = ((tokend))-1;}}
+ goto st88;
+tr245:
+#line 798 "rlscan.rl"
+ {tokend = p+1;{ token( TK_NotStartToState ); }{p = ((tokend))-1;}}
+ goto st88;
+tr246:
+#line 835 "rlscan.rl"
+ {tokend = p;{ token( TK_Middle ); }{p = ((tokend))-1;}}
+ goto st88;
+tr247:
+#line 824 "rlscan.rl"
+ {tokend = p+1;{ token( TK_MiddleGblError ); }{p = ((tokend))-1;}}
+ goto st88;
+tr248:
+#line 808 "rlscan.rl"
+ {tokend = p+1;{ token( TK_MiddleFromState ); }{p = ((tokend))-1;}}
+ goto st88;
+tr249:
+#line 816 "rlscan.rl"
+ {tokend = p+1;{ token( TK_MiddleEOF ); }{p = ((tokend))-1;}}
+ goto st88;
+tr250:
+#line 832 "rlscan.rl"
+ {tokend = p+1;{ token( TK_MiddleLocalError ); }{p = ((tokend))-1;}}
+ goto st88;
+tr251:
+#line 800 "rlscan.rl"
+ {tokend = p+1;{ token( TK_MiddleToState ); }{p = ((tokend))-1;}}
+ goto st88;
+tr252:
+#line 846 "rlscan.rl"
+ {tokend = p+1;{ token( TK_DoubleArrow ); }{p = ((tokend))-1;}}
+ goto st88;
+tr253:
+#line 819 "rlscan.rl"
+ {tokend = p+1;{ token( TK_StartGblError ); }{p = ((tokend))-1;}}
+ goto st88;
+tr254:
+#line 803 "rlscan.rl"
+ {tokend = p+1;{ token( TK_StartFromState ); }{p = ((tokend))-1;}}
+ goto st88;
+tr255:
+#line 811 "rlscan.rl"
+ {tokend = p+1;{ token( TK_StartEOF ); }{p = ((tokend))-1;}}
+ goto st88;
+tr256:
+#line 838 "rlscan.rl"
+ {tokend = p+1;{ token( TK_StartCond ); }{p = ((tokend))-1;}}
+ goto st88;
+tr257:
+#line 827 "rlscan.rl"
+ {tokend = p+1;{ token( TK_StartLocalError ); }{p = ((tokend))-1;}}
+ goto st88;
+tr258:
+#line 795 "rlscan.rl"
+ {tokend = p+1;{ token( TK_StartToState ); }{p = ((tokend))-1;}}
+ goto st88;
+tr259:
+#line 823 "rlscan.rl"
+ {tokend = p+1;{ token( TK_NotFinalGblError ); }{p = ((tokend))-1;}}
+ goto st88;
+tr260:
+#line 807 "rlscan.rl"
+ {tokend = p+1;{ token( TK_NotFinalFromState ); }{p = ((tokend))-1;}}
+ goto st88;
+tr261:
+#line 815 "rlscan.rl"
+ {tokend = p+1;{ token( TK_NotFinalEOF ); }{p = ((tokend))-1;}}
+ goto st88;
+tr262:
+#line 831 "rlscan.rl"
+ {tokend = p+1;{ token( TK_NotFinalLocalError ); }{p = ((tokend))-1;}}
+ goto st88;
+tr263:
+#line 799 "rlscan.rl"
+ {tokend = p+1;{ token( TK_NotFinalToState ); }{p = ((tokend))-1;}}
+ goto st88;
+tr264:
+#line 1 "rlscan.rl"
+ { switch( act ) {
+ case 62:
+ { token( KW_Machine ); }
+ break;
+ case 63:
+ { token( KW_Include ); }
+ break;
+ case 64:
+ { token( KW_Import ); }
+ break;
+ case 65:
+ {
+ token( KW_Write );
+ {{p = ((tokend))-1;}{goto st85;}}
+ }
+ break;
+ case 66:
+ { token( KW_Action ); }
+ break;
+ case 67:
+ { token( KW_AlphType ); }
+ break;
+ case 68:
+ {
+ token( KW_GetKey );
+ inlineBlockType = SemiTerminated;
+ {{p = ((tokend))-1;}{goto st37;}}
+ }
+ break;
+ case 69:
+ {
+ token( KW_Access );
+ inlineBlockType = SemiTerminated;
+ {{p = ((tokend))-1;}{goto st37;}}
+ }
+ break;
+ case 70:
+ {
+ token( KW_Variable );
+ inlineBlockType = SemiTerminated;
+ {{p = ((tokend))-1;}{goto st37;}}
+ }
+ break;
+ case 71:
+ { token( KW_When ); }
+ break;
+ case 72:
+ { token( KW_Eof ); }
+ break;
+ case 73:
+ { token( KW_Err ); }
+ break;
+ case 74:
+ { token( KW_Lerr ); }
+ break;
+ case 75:
+ { token( KW_To ); }
+ break;
+ case 76:
+ { token( KW_From ); }
+ break;
+ case 77:
+ { token( KW_Export ); }
+ break;
+ case 78:
+ { token( TK_Word, tokstart, tokend ); }
+ break;
+ default: break;
+ }
+ {p = ((tokend))-1;}}
+ goto st88;
+tr265:
+#line 784 "rlscan.rl"
+ {tokend = p;{ token( RE_SqOpen ); {{p = ((tokend))-1;}{stack[top++] = 88; goto st79;}} }{p = ((tokend))-1;}}
+ goto st88;
+tr266:
+#line 785 "rlscan.rl"
+ {tokend = p+1;{ token( RE_SqOpenNeg ); {{p = ((tokend))-1;}{stack[top++] = 88; goto st79;}} }{p = ((tokend))-1;}}
+ goto st88;
+tr267:
+#line 774 "rlscan.rl"
+ {tokend = p;{ token( TK_Word, tokstart, tokend ); }{p = ((tokend))-1;}}
+ goto st88;
+tr336:
+#line 853 "rlscan.rl"
+ {tokend = p+1;{ token( TK_BarStar ); }{p = ((tokend))-1;}}
+ goto st88;
+st88:
+#line 1 "rlscan.rl"
+ {tokstart = 0;}
+ if ( ++p == pe )
+ goto _out88;
+case 88:
+#line 1 "rlscan.rl"
+ {tokstart = p;}
+#line 3117 "rlscan.cpp"
+ switch( (*p) ) {
+ case 0: goto tr177;
+ case 9: goto st89;
+ case 10: goto tr179;
+ case 13: goto st89;
+ case 32: goto st89;
+ case 34: goto tr180;
+ case 35: goto tr181;
+ case 36: goto st93;
+ case 37: goto st94;
+ case 39: goto tr184;
+ case 42: goto st96;
+ case 45: goto st97;
+ case 46: goto st98;
+ case 47: goto tr188;
+ case 48: goto tr189;
+ case 58: goto st102;
+ case 60: goto st104;
+ case 61: goto st106;
+ case 62: goto st107;
+ case 64: goto st108;
+ case 91: goto st110;
+ case 95: goto tr196;
+ case 97: goto st111;
+ case 101: goto st125;
+ case 102: goto st132;
+ case 103: goto st135;
+ case 105: goto st140;
+ case 108: goto st150;
+ case 109: goto st153;
+ case 116: goto st159;
+ case 118: goto st160;
+ case 119: goto st167;
+ case 123: goto tr208;
+ case 124: goto st173;
+ case 125: goto tr210;
+ }
+ if ( (*p) < 65 ) {
+ if ( 49 <= (*p) && (*p) <= 57 )
+ goto st100;
+ } else if ( (*p) > 90 ) {
+ if ( 98 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr176;
+st89:
+ if ( ++p == pe )
+ goto _out89;
+case 89:
+ switch( (*p) ) {
+ case 9: goto st89;
+ case 13: goto st89;
+ case 32: goto st89;
+ }
+ goto tr211;
+tr180:
+#line 1 "rlscan.rl"
+ {tokend = p+1;}
+ goto st90;
+st90:
+ if ( ++p == pe )
+ goto _out90;
+case 90:
+#line 3182 "rlscan.cpp"
+ switch( (*p) ) {
+ case 10: goto tr29;
+ case 34: goto st91;
+ case 92: goto st17;
+ }
+ goto st16;
+tr29:
+#line 532 "rlscan.rl"
+ {
+ lastnl = p;
+ column = 0;
+ line++;
+ }
+ goto st16;
+st16:
+ if ( ++p == pe )
+ goto _out16;
+case 16:
+#line 3201 "rlscan.cpp"
+ switch( (*p) ) {
+ case 10: goto tr29;
+ case 34: goto st91;
+ case 92: goto st17;
+ }
+ goto st16;
+st91:
+ if ( ++p == pe )
+ goto _out91;
+case 91:
+ if ( (*p) == 105 )
+ goto tr213;
+ goto tr212;
+st17:
+ if ( ++p == pe )
+ goto _out17;
+case 17:
+ if ( (*p) == 10 )
+ goto tr29;
+ goto st16;
+tr181:
+#line 1 "rlscan.rl"
+ {tokend = p+1;}
+ goto st92;
+st92:
+ if ( ++p == pe )
+ goto _out92;
+case 92:
+#line 3230 "rlscan.cpp"
+ if ( (*p) == 10 )
+ goto tr33;
+ goto st18;
+st18:
+ if ( ++p == pe )
+ goto _out18;
+case 18:
+ if ( (*p) == 10 )
+ goto tr33;
+ goto st18;
+st93:
+ if ( ++p == pe )
+ goto _out93;
+case 93:
+ switch( (*p) ) {
+ case 33: goto tr215;
+ case 42: goto tr216;
+ case 47: goto tr217;
+ case 63: goto tr218;
+ case 94: goto tr219;
+ case 126: goto tr220;
+ }
+ goto tr214;
+st94:
+ if ( ++p == pe )
+ goto _out94;
+case 94:
+ switch( (*p) ) {
+ case 33: goto tr221;
+ case 42: goto tr222;
+ case 47: goto tr223;
+ case 63: goto tr224;
+ case 94: goto tr225;
+ case 126: goto tr226;
+ }
+ goto tr214;
+tr184:
+#line 1 "rlscan.rl"
+ {tokend = p+1;}
+ goto st95;
+st95:
+ if ( ++p == pe )
+ goto _out95;
+case 95:
+#line 3275 "rlscan.cpp"
+ switch( (*p) ) {
+ case 10: goto tr35;
+ case 39: goto st91;
+ case 92: goto st20;
+ }
+ goto st19;
+tr35:
+#line 532 "rlscan.rl"
+ {
+ lastnl = p;
+ column = 0;
+ line++;
+ }
+ goto st19;
+st19:
+ if ( ++p == pe )
+ goto _out19;
+case 19:
+#line 3294 "rlscan.cpp"
+ switch( (*p) ) {
+ case 10: goto tr35;
+ case 39: goto st91;
+ case 92: goto st20;
+ }
+ goto st19;
+st20:
+ if ( ++p == pe )
+ goto _out20;
+case 20:
+ if ( (*p) == 10 )
+ goto tr35;
+ goto st19;
+st96:
+ if ( ++p == pe )
+ goto _out96;
+case 96:
+ if ( (*p) == 42 )
+ goto tr227;
+ goto tr214;
+st97:
+ if ( ++p == pe )
+ goto _out97;
+case 97:
+ switch( (*p) ) {
+ case 45: goto tr228;
+ case 62: goto tr229;
+ }
+ goto tr214;
+st98:
+ if ( ++p == pe )
+ goto _out98;
+case 98:
+ if ( (*p) == 46 )
+ goto tr230;
+ goto tr214;
+tr189:
+#line 1 "rlscan.rl"
+ {tokend = p+1;}
+ goto st99;
+st99:
+ if ( ++p == pe )
+ goto _out99;
+case 99:
+#line 3339 "rlscan.cpp"
+ if ( (*p) == 120 )
+ goto st21;
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto st100;
+ goto tr231;
+st100:
+ if ( ++p == pe )
+ goto _out100;
+case 100:
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto st100;
+ goto tr231;
+st21:
+ if ( ++p == pe )
+ goto _out21;
+case 21:
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto st101;
+ } else if ( (*p) > 70 ) {
+ if ( 97 <= (*p) && (*p) <= 102 )
+ goto st101;
+ } else
+ goto st101;
+ goto tr37;
+st101:
+ if ( ++p == pe )
+ goto _out101;
+case 101:
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto st101;
+ } else if ( (*p) > 70 ) {
+ if ( 97 <= (*p) && (*p) <= 102 )
+ goto st101;
+ } else
+ goto st101;
+ goto tr233;
+st102:
+ if ( ++p == pe )
+ goto _out102;
+case 102:
+ switch( (*p) ) {
+ case 58: goto tr234;
+ case 61: goto tr235;
+ case 62: goto st103;
+ }
+ goto tr214;
+st103:
+ if ( ++p == pe )
+ goto _out103;
+case 103:
+ if ( (*p) == 62 )
+ goto tr238;
+ goto tr237;
+st104:
+ if ( ++p == pe )
+ goto _out104;
+case 104:
+ switch( (*p) ) {
+ case 33: goto tr239;
+ case 42: goto tr240;
+ case 47: goto tr241;
+ case 58: goto tr242;
+ case 62: goto st105;
+ case 94: goto tr244;
+ case 126: goto tr245;
+ }
+ goto tr214;
+st105:
+ if ( ++p == pe )
+ goto _out105;
+case 105:
+ switch( (*p) ) {
+ case 33: goto tr247;
+ case 42: goto tr248;
+ case 47: goto tr249;
+ case 94: goto tr250;
+ case 126: goto tr251;
+ }
+ goto tr246;
+st106:
+ if ( ++p == pe )
+ goto _out106;
+case 106:
+ if ( (*p) == 62 )
+ goto tr252;
+ goto tr214;
+st107:
+ if ( ++p == pe )
+ goto _out107;
+case 107:
+ switch( (*p) ) {
+ case 33: goto tr253;
+ case 42: goto tr254;
+ case 47: goto tr255;
+ case 63: goto tr256;
+ case 94: goto tr257;
+ case 126: goto tr258;
+ }
+ goto tr214;
+st108:
+ if ( ++p == pe )
+ goto _out108;
+case 108:
+ switch( (*p) ) {
+ case 33: goto tr259;
+ case 42: goto tr260;
+ case 47: goto tr261;
+ case 94: goto tr262;
+ case 126: goto tr263;
+ }
+ goto tr214;
+tr196:
+#line 1 "rlscan.rl"
+ {tokend = p+1;}
+#line 774 "rlscan.rl"
+ {act = 78;}
+ goto st109;
+tr274:
+#line 1 "rlscan.rl"
+ {tokend = p+1;}
+#line 755 "rlscan.rl"
+ {act = 69;}
+ goto st109;
+tr277:
+#line 1 "rlscan.rl"
+ {tokend = p+1;}
+#line 744 "rlscan.rl"
+ {act = 66;}
+ goto st109;
+tr283:
+#line 1 "rlscan.rl"
+ {tokend = p+1;}
+#line 745 "rlscan.rl"
+ {act = 67;}
+ goto st109;
+tr287:
+#line 1 "rlscan.rl"
+ {tokend = p+1;}
+#line 766 "rlscan.rl"
+ {act = 72;}
+ goto st109;
+tr288:
+#line 1 "rlscan.rl"
+ {tokend = p+1;}
+#line 767 "rlscan.rl"
+ {act = 73;}
+ goto st109;
+tr292:
+#line 1 "rlscan.rl"
+ {tokend = p+1;}
+#line 771 "rlscan.rl"
+ {act = 77;}
+ goto st109;
+tr295:
+#line 1 "rlscan.rl"
+ {tokend = p+1;}
+#line 770 "rlscan.rl"
+ {act = 76;}
+ goto st109;
+tr300:
+#line 1 "rlscan.rl"
+ {tokend = p+1;}
+#line 750 "rlscan.rl"
+ {act = 68;}
+ goto st109;
+tr306:
+#line 1 "rlscan.rl"
+ {tokend = p+1;}
+#line 739 "rlscan.rl"
+ {act = 64;}
+ goto st109;
+tr311:
+#line 1 "rlscan.rl"
+ {tokend = p+1;}
+#line 738 "rlscan.rl"
+ {act = 63;}
+ goto st109;
+tr314:
+#line 1 "rlscan.rl"
+ {tokend = p+1;}
+#line 768 "rlscan.rl"
+ {act = 74;}
+ goto st109;
+tr320:
+#line 1 "rlscan.rl"
+ {tokend = p+1;}
+#line 737 "rlscan.rl"
+ {act = 62;}
+ goto st109;
+tr321:
+#line 1 "rlscan.rl"
+ {tokend = p+1;}
+#line 769 "rlscan.rl"
+ {act = 75;}
+ goto st109;
+tr328:
+#line 1 "rlscan.rl"
+ {tokend = p+1;}
+#line 760 "rlscan.rl"
+ {act = 70;}
+ goto st109;
+tr332:
+#line 1 "rlscan.rl"
+ {tokend = p+1;}
+#line 765 "rlscan.rl"
+ {act = 71;}
+ goto st109;
+tr335:
+#line 1 "rlscan.rl"
+ {tokend = p+1;}
+#line 740 "rlscan.rl"
+ {act = 65;}
+ goto st109;
+st109:
+ if ( ++p == pe )
+ goto _out109;
+case 109:
+#line 3559 "rlscan.cpp"
+ if ( (*p) == 95 )
+ goto tr196;
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr264;
+st110:
+ if ( ++p == pe )
+ goto _out110;
+case 110:
+ if ( (*p) == 94 )
+ goto tr266;
+ goto tr265;
+st111:
+ if ( ++p == pe )
+ goto _out111;
+case 111:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 99: goto st112;
+ case 108: goto st119;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st112:
+ if ( ++p == pe )
+ goto _out112;
+case 112:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 99: goto st113;
+ case 116: goto st116;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st113:
+ if ( ++p == pe )
+ goto _out113;
+case 113:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 101: goto st114;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st114:
+ if ( ++p == pe )
+ goto _out114;
+case 114:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 115: goto st115;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st115:
+ if ( ++p == pe )
+ goto _out115;
+case 115:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 115: goto tr274;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st116:
+ if ( ++p == pe )
+ goto _out116;
+case 116:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 105: goto st117;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st117:
+ if ( ++p == pe )
+ goto _out117;
+case 117:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 111: goto st118;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st118:
+ if ( ++p == pe )
+ goto _out118;
+case 118:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 110: goto tr277;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st119:
+ if ( ++p == pe )
+ goto _out119;
+case 119:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 112: goto st120;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st120:
+ if ( ++p == pe )
+ goto _out120;
+case 120:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 104: goto st121;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st121:
+ if ( ++p == pe )
+ goto _out121;
+case 121:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 116: goto st122;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st122:
+ if ( ++p == pe )
+ goto _out122;
+case 122:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 121: goto st123;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st123:
+ if ( ++p == pe )
+ goto _out123;
+case 123:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 112: goto st124;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st124:
+ if ( ++p == pe )
+ goto _out124;
+case 124:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 101: goto tr283;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st125:
+ if ( ++p == pe )
+ goto _out125;
+case 125:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 111: goto st126;
+ case 114: goto st127;
+ case 120: goto st128;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st126:
+ if ( ++p == pe )
+ goto _out126;
+case 126:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 102: goto tr287;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st127:
+ if ( ++p == pe )
+ goto _out127;
+case 127:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 114: goto tr288;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st128:
+ if ( ++p == pe )
+ goto _out128;
+case 128:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 112: goto st129;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st129:
+ if ( ++p == pe )
+ goto _out129;
+case 129:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 111: goto st130;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st130:
+ if ( ++p == pe )
+ goto _out130;
+case 130:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 114: goto st131;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st131:
+ if ( ++p == pe )
+ goto _out131;
+case 131:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 116: goto tr292;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st132:
+ if ( ++p == pe )
+ goto _out132;
+case 132:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 114: goto st133;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st133:
+ if ( ++p == pe )
+ goto _out133;
+case 133:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 111: goto st134;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st134:
+ if ( ++p == pe )
+ goto _out134;
+case 134:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 109: goto tr295;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st135:
+ if ( ++p == pe )
+ goto _out135;
+case 135:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 101: goto st136;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st136:
+ if ( ++p == pe )
+ goto _out136;
+case 136:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 116: goto st137;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st137:
+ if ( ++p == pe )
+ goto _out137;
+case 137:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 107: goto st138;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st138:
+ if ( ++p == pe )
+ goto _out138;
+case 138:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 101: goto st139;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st139:
+ if ( ++p == pe )
+ goto _out139;
+case 139:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 121: goto tr300;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st140:
+ if ( ++p == pe )
+ goto _out140;
+case 140:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 109: goto st141;
+ case 110: goto st145;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st141:
+ if ( ++p == pe )
+ goto _out141;
+case 141:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 112: goto st142;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st142:
+ if ( ++p == pe )
+ goto _out142;
+case 142:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 111: goto st143;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st143:
+ if ( ++p == pe )
+ goto _out143;
+case 143:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 114: goto st144;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st144:
+ if ( ++p == pe )
+ goto _out144;
+case 144:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 116: goto tr306;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st145:
+ if ( ++p == pe )
+ goto _out145;
+case 145:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 99: goto st146;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st146:
+ if ( ++p == pe )
+ goto _out146;
+case 146:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 108: goto st147;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st147:
+ if ( ++p == pe )
+ goto _out147;
+case 147:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 117: goto st148;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st148:
+ if ( ++p == pe )
+ goto _out148;
+case 148:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 100: goto st149;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st149:
+ if ( ++p == pe )
+ goto _out149;
+case 149:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 101: goto tr311;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st150:
+ if ( ++p == pe )
+ goto _out150;
+case 150:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 101: goto st151;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st151:
+ if ( ++p == pe )
+ goto _out151;
+case 151:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 114: goto st152;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st152:
+ if ( ++p == pe )
+ goto _out152;
+case 152:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 114: goto tr314;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st153:
+ if ( ++p == pe )
+ goto _out153;
+case 153:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 97: goto st154;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 98 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st154:
+ if ( ++p == pe )
+ goto _out154;
+case 154:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 99: goto st155;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st155:
+ if ( ++p == pe )
+ goto _out155;
+case 155:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 104: goto st156;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st156:
+ if ( ++p == pe )
+ goto _out156;
+case 156:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 105: goto st157;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st157:
+ if ( ++p == pe )
+ goto _out157;
+case 157:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 110: goto st158;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st158:
+ if ( ++p == pe )
+ goto _out158;
+case 158:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 101: goto tr320;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st159:
+ if ( ++p == pe )
+ goto _out159;
+case 159:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 111: goto tr321;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st160:
+ if ( ++p == pe )
+ goto _out160;
+case 160:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 97: goto st161;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 98 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st161:
+ if ( ++p == pe )
+ goto _out161;
+case 161:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 114: goto st162;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st162:
+ if ( ++p == pe )
+ goto _out162;
+case 162:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 105: goto st163;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st163:
+ if ( ++p == pe )
+ goto _out163;
+case 163:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 97: goto st164;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 98 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st164:
+ if ( ++p == pe )
+ goto _out164;
+case 164:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 98: goto st165;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st165:
+ if ( ++p == pe )
+ goto _out165;
+case 165:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 108: goto st166;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st166:
+ if ( ++p == pe )
+ goto _out166;
+case 166:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 101: goto tr328;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st167:
+ if ( ++p == pe )
+ goto _out167;
+case 167:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 104: goto st168;
+ case 114: goto st170;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st168:
+ if ( ++p == pe )
+ goto _out168;
+case 168:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 101: goto st169;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st169:
+ if ( ++p == pe )
+ goto _out169;
+case 169:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 110: goto tr332;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st170:
+ if ( ++p == pe )
+ goto _out170;
+case 170:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 105: goto st171;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st171:
+ if ( ++p == pe )
+ goto _out171;
+case 171:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 116: goto st172;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st172:
+ if ( ++p == pe )
+ goto _out172;
+case 172:
+ switch( (*p) ) {
+ case 95: goto tr196;
+ case 101: goto tr335;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr196;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr196;
+ } else
+ goto tr196;
+ goto tr267;
+st173:
+ if ( ++p == pe )
+ goto _out173;
+case 173:
+ if ( (*p) == 42 )
+ goto tr336;
+ goto tr214;
+tr210:
+#line 1 "rlscan.rl"
+ {tokend = p+1;}
+ goto st174;
+st174:
+ if ( ++p == pe )
+ goto _out174;
+case 174:
+#line 4653 "rlscan.cpp"
+ if ( (*p) == 37 )
+ goto st22;
+ goto tr214;
+st22:
+ if ( ++p == pe )
+ goto _out22;
+case 22:
+ if ( (*p) == 37 )
+ goto tr40;
+ goto tr39;
+ }
+ _out23: cs = 23; goto _out;
+ _out24: cs = 24; goto _out;
+ _out25: cs = 25; goto _out;
+ _out1: cs = 1; goto _out;
+ _out2: cs = 2; goto _out;
+ _out26: cs = 26; goto _out;
+ _out27: cs = 27; goto _out;
+ _out28: cs = 28; goto _out;
+ _out3: cs = 3; goto _out;
+ _out4: cs = 4; goto _out;
+ _out29: cs = 29; goto _out;
+ _out5: cs = 5; goto _out;
+ _out6: cs = 6; goto _out;
+ _out7: cs = 7; goto _out;
+ _out30: cs = 30; goto _out;
+ _out31: cs = 31; goto _out;
+ _out32: cs = 32; goto _out;
+ _out33: cs = 33; goto _out;
+ _out34: cs = 34; goto _out;
+ _out35: cs = 35; goto _out;
+ _out36: cs = 36; goto _out;
+ _out37: cs = 37; goto _out;
+ _out38: cs = 38; goto _out;
+ _out39: cs = 39; goto _out;
+ _out8: cs = 8; goto _out;
+ _out9: cs = 9; goto _out;
+ _out40: cs = 40; goto _out;
+ _out10: cs = 10; goto _out;
+ _out11: cs = 11; goto _out;
+ _out41: cs = 41; goto _out;
+ _out12: cs = 12; goto _out;
+ _out13: cs = 13; goto _out;
+ _out14: cs = 14; goto _out;
+ _out42: cs = 42; goto _out;
+ _out43: cs = 43; goto _out;
+ _out15: cs = 15; goto _out;
+ _out44: cs = 44; goto _out;
+ _out45: cs = 45; goto _out;
+ _out46: cs = 46; goto _out;
+ _out47: cs = 47; goto _out;
+ _out48: cs = 48; goto _out;
+ _out49: cs = 49; goto _out;
+ _out50: cs = 50; goto _out;
+ _out51: cs = 51; goto _out;
+ _out52: cs = 52; goto _out;
+ _out53: cs = 53; goto _out;
+ _out54: cs = 54; goto _out;
+ _out55: cs = 55; goto _out;
+ _out56: cs = 56; goto _out;
+ _out57: cs = 57; goto _out;
+ _out58: cs = 58; goto _out;
+ _out59: cs = 59; goto _out;
+ _out60: cs = 60; goto _out;
+ _out61: cs = 61; goto _out;
+ _out62: cs = 62; goto _out;
+ _out63: cs = 63; goto _out;
+ _out64: cs = 64; goto _out;
+ _out65: cs = 65; goto _out;
+ _out66: cs = 66; goto _out;
+ _out67: cs = 67; goto _out;
+ _out68: cs = 68; goto _out;
+ _out69: cs = 69; goto _out;
+ _out70: cs = 70; goto _out;
+ _out71: cs = 71; goto _out;
+ _out72: cs = 72; goto _out;
+ _out73: cs = 73; goto _out;
+ _out74: cs = 74; goto _out;
+ _out75: cs = 75; goto _out;
+ _out76: cs = 76; goto _out;
+ _out77: cs = 77; goto _out;
+ _out78: cs = 78; goto _out;
+ _out79: cs = 79; goto _out;
+ _out80: cs = 80; goto _out;
+ _out81: cs = 81; goto _out;
+ _out82: cs = 82; goto _out;
+ _out83: cs = 83; goto _out;
+ _out84: cs = 84; goto _out;
+ _out85: cs = 85; goto _out;
+ _out0: cs = 0; goto _out;
+ _out86: cs = 86; goto _out;
+ _out87: cs = 87; goto _out;
+ _out88: cs = 88; goto _out;
+ _out89: cs = 89; goto _out;
+ _out90: cs = 90; goto _out;
+ _out16: cs = 16; goto _out;
+ _out91: cs = 91; goto _out;
+ _out17: cs = 17; goto _out;
+ _out92: cs = 92; goto _out;
+ _out18: cs = 18; goto _out;
+ _out93: cs = 93; goto _out;
+ _out94: cs = 94; goto _out;
+ _out95: cs = 95; goto _out;
+ _out19: cs = 19; goto _out;
+ _out20: cs = 20; goto _out;
+ _out96: cs = 96; goto _out;
+ _out97: cs = 97; goto _out;
+ _out98: cs = 98; goto _out;
+ _out99: cs = 99; goto _out;
+ _out100: cs = 100; goto _out;
+ _out21: cs = 21; goto _out;
+ _out101: cs = 101; goto _out;
+ _out102: cs = 102; goto _out;
+ _out103: cs = 103; goto _out;
+ _out104: cs = 104; goto _out;
+ _out105: cs = 105; goto _out;
+ _out106: cs = 106; goto _out;
+ _out107: cs = 107; goto _out;
+ _out108: cs = 108; goto _out;
+ _out109: cs = 109; goto _out;
+ _out110: cs = 110; goto _out;
+ _out111: cs = 111; goto _out;
+ _out112: cs = 112; goto _out;
+ _out113: cs = 113; goto _out;
+ _out114: cs = 114; goto _out;
+ _out115: cs = 115; goto _out;
+ _out116: cs = 116; goto _out;
+ _out117: cs = 117; goto _out;
+ _out118: cs = 118; goto _out;
+ _out119: cs = 119; goto _out;
+ _out120: cs = 120; goto _out;
+ _out121: cs = 121; goto _out;
+ _out122: cs = 122; goto _out;
+ _out123: cs = 123; goto _out;
+ _out124: cs = 124; goto _out;
+ _out125: cs = 125; goto _out;
+ _out126: cs = 126; goto _out;
+ _out127: cs = 127; goto _out;
+ _out128: cs = 128; goto _out;
+ _out129: cs = 129; goto _out;
+ _out130: cs = 130; goto _out;
+ _out131: cs = 131; goto _out;
+ _out132: cs = 132; goto _out;
+ _out133: cs = 133; goto _out;
+ _out134: cs = 134; goto _out;
+ _out135: cs = 135; goto _out;
+ _out136: cs = 136; goto _out;
+ _out137: cs = 137; goto _out;
+ _out138: cs = 138; goto _out;
+ _out139: cs = 139; goto _out;
+ _out140: cs = 140; goto _out;
+ _out141: cs = 141; goto _out;
+ _out142: cs = 142; goto _out;
+ _out143: cs = 143; goto _out;
+ _out144: cs = 144; goto _out;
+ _out145: cs = 145; goto _out;
+ _out146: cs = 146; goto _out;
+ _out147: cs = 147; goto _out;
+ _out148: cs = 148; goto _out;
+ _out149: cs = 149; goto _out;
+ _out150: cs = 150; goto _out;
+ _out151: cs = 151; goto _out;
+ _out152: cs = 152; goto _out;
+ _out153: cs = 153; goto _out;
+ _out154: cs = 154; goto _out;
+ _out155: cs = 155; goto _out;
+ _out156: cs = 156; goto _out;
+ _out157: cs = 157; goto _out;
+ _out158: cs = 158; goto _out;
+ _out159: cs = 159; goto _out;
+ _out160: cs = 160; goto _out;
+ _out161: cs = 161; goto _out;
+ _out162: cs = 162; goto _out;
+ _out163: cs = 163; goto _out;
+ _out164: cs = 164; goto _out;
+ _out165: cs = 165; goto _out;
+ _out166: cs = 166; goto _out;
+ _out167: cs = 167; goto _out;
+ _out168: cs = 168; goto _out;
+ _out169: cs = 169; goto _out;
+ _out170: cs = 170; goto _out;
+ _out171: cs = 171; goto _out;
+ _out172: cs = 172; goto _out;
+ _out173: cs = 173; goto _out;
+ _out174: cs = 174; goto _out;
+ _out22: cs = 22; goto _out;
+
+ _out: {}
+ }
+#line 972 "rlscan.rl"
+
+ /* Check if we failed. */
+ if ( cs == rlscan_error ) {
+ /* Machine failed before finding a token. I'm not yet sure if this
+ * is reachable. */
+ scan_error() << "scanner error" << endl;
+ exit(1);
+ }
+
+ /* Decide if we need to preserve anything. */
+ char *preserve = tokstart;
+
+ /* Now set up the prefix. */
+ if ( preserve == 0 )
+ have = 0;
+ else {
+ /* There is data that needs to be shifted over. */
+ have = pe - preserve;
+ memmove( buf, preserve, have );
+ unsigned int shiftback = preserve - buf;
+ if ( tokstart != 0 )
+ tokstart -= shiftback;
+ tokend -= shiftback;
+
+ preserve = buf;
+ }
+ }
+
+ delete[] buf;
+}
+
+void scan( char *fileName, istream &input, ostream &output )
+{
+}
diff --git a/contrib/tools/ragel5/ragel/rlscan.h b/contrib/tools/ragel5/ragel/rlscan.h
new file mode 100644
index 0000000000..e6302aa4c9
--- /dev/null
+++ b/contrib/tools/ragel5/ragel/rlscan.h
@@ -0,0 +1,161 @@
+/*
+ * Copyright 2007 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _RLSCAN_H
+#define _RLSCAN_H
+
+#include <iostream>
+#include "rlscan.h"
+#include "vector.h"
+#include "rlparse.h"
+#include "parsedata.h"
+#include "avltree.h"
+#include "vector.h"
+
+using std::istream;
+using std::ostream;
+
+extern char *Parser_lelNames[];
+
+/* This is used for tracking the current stack of include file/machine pairs. It is
+ * is used to detect and recursive include structure. */
+struct IncludeStackItem
+{
+ IncludeStackItem(const char *fileName, char *sectionName )
+ : fileName(fileName), sectionName(sectionName) {}
+
+ const char *fileName;
+ char *sectionName;
+};
+
+typedef Vector<IncludeStackItem> IncludeStack;
+
+inline char* resolvePath(const char* rel, const char* abs) {
+ const size_t l1 = strlen(rel);
+ const size_t l2 = strlen(abs);
+ char* ret = new char[l1 + l2 + 1];
+
+ const char* p = strrchr(abs, '/') + 1;
+ const size_t l3 = p - abs;
+
+ memcpy(ret, abs, l3);
+ strcpy(ret + l3, rel);
+
+ return ret;
+}
+
+struct Scanner
+{
+ Scanner(const char *fileName, istream &input, ostream &output,
+ Parser *inclToParser, char *inclSectionTarg,
+ int includeDepth, bool importMachines )
+ :
+ fileName(fileName), input(input), output(output),
+ inclToParser(inclToParser),
+ inclSectionTarg(inclSectionTarg),
+ includeDepth(includeDepth),
+ importMachines(importMachines),
+ cur_token(0),
+ line(1), column(1), lastnl(0),
+ parser(0), ignoreSection(false),
+ parserExistsError(false),
+ whitespaceOn(true),
+ lastToken(0)
+ {}
+
+ bool recursiveInclude(const char *inclFileName, char *inclSectionName );
+
+ char *prepareFileName( char *inclFileName, int len )
+ {
+ if (*inclFileName == '\"') {
+ inclFileName[len - 1] = 0;
+ ++inclFileName;
+ }
+ char* res = resolvePath(inclFileName, fileName); // there was a memory leek in the original too
+ return res;
+ }
+
+ void init();
+ void token( int type, char *start, char *end );
+ void token( int type, char c );
+ void token( int type );
+ void processToken( int type, char *tokdata, int toklen );
+ void directToParser( Parser *toParser, const char *tokFileName, int tokLine,
+ int tokColumn, int type, char *tokdata, int toklen );
+ void flushImport( );
+ void importToken( int type, char *start, char *end );
+ void pass( int token, char *start, char *end );
+ void pass();
+ void updateCol();
+ void startSection();
+ void endSection();
+ void do_scan();
+ bool active();
+ ostream &scan_error();
+
+ const char *fileName;
+ istream &input;
+ ostream &output;
+ Parser *inclToParser;
+ char *inclSectionTarg;
+ int includeDepth;
+ bool importMachines;
+
+ /* For import parsing. */
+ int tok_cs, tok_act;
+ int *tok_tokstart, *tok_tokend;
+ int cur_token;
+ static const int max_tokens = 32;
+ int token_data[max_tokens];
+ char *token_strings[max_tokens];
+ int token_lens[max_tokens];
+
+ /* For section processing. */
+ int cs;
+ char *word, *lit;
+ int word_len, lit_len;
+
+ /* For character scanning. */
+ int line;
+ InputLoc sectionLoc;
+ char *tokstart, *tokend;
+ int column;
+ char *lastnl;
+
+ /* Set by machine statements, these persist from section to section
+ * allowing for unnamed sections. */
+ Parser *parser;
+ bool ignoreSection;
+ IncludeStack includeStack;
+
+ /* This is set if ragel has already emitted an error stating that
+ * no section name has been seen and thus no parser exists. */
+ bool parserExistsError;
+
+ /* This is for inline code. By default it is on. It goes off for
+ * statements and values in inline blocks which are parsed. */
+ bool whitespaceOn;
+
+ /* Keeps a record of the previous token sent to the section parser. */
+ int lastToken;
+};
+
+#endif /* _RLSCAN_H */
diff --git a/contrib/tools/ragel5/ragel/xmlcodegen.cpp b/contrib/tools/ragel5/ragel/xmlcodegen.cpp
new file mode 100644
index 0000000000..021c97e87d
--- /dev/null
+++ b/contrib/tools/ragel5/ragel/xmlcodegen.cpp
@@ -0,0 +1,713 @@
+/*
+ * Copyright 2005, 2006 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+
+#include "ragel.h"
+#include "xmlcodegen.h"
+#include "parsedata.h"
+#include "fsmgraph.h"
+#include <string.h>
+
+using namespace std;
+
+XMLCodeGen::XMLCodeGen( char *fsmName, ParseData *pd, FsmAp *fsm,
+ std::ostream &out )
+:
+ fsmName(fsmName),
+ pd(pd),
+ fsm(fsm),
+ out(out),
+ nextActionTableId(0)
+{
+}
+
+
+void XMLCodeGen::writeActionList()
+{
+ /* Determine which actions to write. */
+ int nextActionId = 0;
+ for ( ActionList::Iter act = pd->actionList; act.lte(); act++ ) {
+ if ( act->numRefs() > 0 || act->numCondRefs > 0 )
+ act->actionId = nextActionId++;
+ }
+
+ /* Write the list. */
+ out << " <action_list length=\"" << nextActionId << "\">\n";
+ for ( ActionList::Iter act = pd->actionList; act.lte(); act++ ) {
+ if ( act->actionId >= 0 )
+ writeAction( act );
+ }
+ out << " </action_list>\n";
+}
+
+void XMLCodeGen::writeActionTableList()
+{
+ /* Must first order the action tables based on their id. */
+ int numTables = nextActionTableId;
+ RedActionTable **tables = new RedActionTable*[numTables];
+ for ( ActionTableMap::Iter at = actionTableMap; at.lte(); at++ )
+ tables[at->id] = at;
+
+ out << " <action_table_list length=\"" << numTables << "\">\n";
+ for ( int t = 0; t < numTables; t++ ) {
+ out << " <action_table id=\"" << t << "\" length=\"" <<
+ tables[t]->key.length() << "\">";
+ for ( ActionTable::Iter atel = tables[t]->key; atel.lte(); atel++ ) {
+ out << atel->value->actionId;
+ if ( ! atel.last() )
+ out << " ";
+ }
+ out << "</action_table>\n";
+ }
+ out << " </action_table_list>\n";
+
+ delete[] tables;
+}
+
+void XMLCodeGen::reduceActionTables()
+{
+ /* Reduce the actions tables to a set. */
+ for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) {
+ RedActionTable *actionTable = 0;
+
+ /* Reduce To State Actions. */
+ if ( st->toStateActionTable.length() > 0 ) {
+ if ( actionTableMap.insert( st->toStateActionTable, &actionTable ) )
+ actionTable->id = nextActionTableId++;
+ }
+
+ /* Reduce From State Actions. */
+ if ( st->fromStateActionTable.length() > 0 ) {
+ if ( actionTableMap.insert( st->fromStateActionTable, &actionTable ) )
+ actionTable->id = nextActionTableId++;
+ }
+
+ /* Reduce EOF actions. */
+ if ( st->eofActionTable.length() > 0 ) {
+ if ( actionTableMap.insert( st->eofActionTable, &actionTable ) )
+ actionTable->id = nextActionTableId++;
+ }
+
+ /* Loop the transitions and reduce their actions. */
+ for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
+ if ( trans->actionTable.length() > 0 ) {
+ if ( actionTableMap.insert( trans->actionTable, &actionTable ) )
+ actionTable->id = nextActionTableId++;
+ }
+ }
+ }
+}
+
+void XMLCodeGen::appendTrans( TransListVect &outList, Key lowKey,
+ Key highKey, TransAp *trans )
+{
+ if ( trans->toState != 0 || trans->actionTable.length() > 0 )
+ outList.append( TransEl( lowKey, highKey, trans ) );
+}
+
+void XMLCodeGen::writeKey( Key key )
+{
+ if ( keyOps->isSigned )
+ out << key.getVal();
+ else
+ out << (unsigned long) key.getVal();
+}
+
+void XMLCodeGen::writeTrans( Key lowKey, Key highKey, TransAp *trans )
+{
+ /* First reduce the action. */
+ RedActionTable *actionTable = 0;
+ if ( trans->actionTable.length() > 0 )
+ actionTable = actionTableMap.find( trans->actionTable );
+
+ /* Write the transition. */
+ out << " <t>";
+ writeKey( lowKey );
+ out << " ";
+ writeKey( highKey );
+
+ if ( trans->toState != 0 )
+ out << " " << trans->toState->alg.stateNum;
+ else
+ out << " x";
+
+ if ( actionTable != 0 )
+ out << " " << actionTable->id;
+ else
+ out << " x";
+ out << "</t>\n";
+}
+
+void XMLCodeGen::writeTransList( StateAp *state )
+{
+ TransListVect outList;
+
+ /* If there is only are no ranges the task is simple. */
+ if ( state->outList.length() > 0 ) {
+ /* Loop each source range. */
+ for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
+ /* Reduce the transition. If it reduced to anything then add it. */
+ appendTrans( outList, trans->lowKey, trans->highKey, trans );
+ }
+ }
+
+ out << " <trans_list length=\"" << outList.length() << "\">\n";
+ for ( TransListVect::Iter tvi = outList; tvi.lte(); tvi++ )
+ writeTrans( tvi->lowKey, tvi->highKey, tvi->value );
+ out << " </trans_list>\n";
+}
+
+void XMLCodeGen::writeLmSwitch( InlineItem *item )
+{
+ LongestMatch *longestMatch = item->longestMatch;
+
+ out << "<lm_switch";
+ if ( longestMatch->lmSwitchHandlesError )
+ out << " handles_error=\"t\"";
+ out << ">\n";
+
+ for ( LmPartList::Iter lmi = *longestMatch->longestMatchList; lmi.lte(); lmi++ ) {
+ if ( lmi->inLmSelect && lmi->action != 0 ) {
+ /* Open the action. Write it with the context that sets up _p
+ * when doing control flow changes from inside the machine. */
+ out << " <sub_action id=\"" << lmi->longestMatchId << "\">";
+ writeInlineList( lmi->action->inlineList, item );
+ out << "</sub_action>\n";
+ }
+ }
+
+ out << " </lm_switch><exec><get_tokend></get_tokend></exec>";
+}
+
+void XMLCodeGen::writeText( InlineItem *item )
+{
+ if ( item->prev == 0 || item->prev->type != InlineItem::Text )
+ out << "<text>";
+ xmlEscapeHost( out, item->data, strlen(item->data) );
+ if ( item->next == 0 || item->next->type != InlineItem::Text )
+ out << "</text>";
+}
+
+void XMLCodeGen::writeCtrlFlow( InlineItem *item, InlineItem *context )
+{
+ if ( context != 0 ) {
+ out << "<sub_action>";
+
+ switch ( context->type ) {
+ case InlineItem::LmOnLast:
+ out << "<exec><get_tokend></get_tokend></exec>";
+ break;
+ case InlineItem::LmOnNext:
+ out << "<exec><get_tokend></get_tokend></exec>";
+ break;
+ case InlineItem::LmOnLagBehind:
+ out << "<exec><get_tokend></get_tokend></exec>";
+ break;
+ case InlineItem::LmSwitch:
+ out << "<exec><get_tokend></get_tokend></exec>";
+ break;
+ default: break;
+ }
+ }
+
+ switch ( item->type ) {
+ case InlineItem::Goto:
+ writeGoto( item, context );
+ break;
+ case InlineItem::GotoExpr:
+ writeGotoExpr( item, context );
+ break;
+ case InlineItem::Call:
+ writeCall( item, context );
+ break;
+ case InlineItem::CallExpr:
+ writeCallExpr( item, context );
+ break;
+ case InlineItem::Next:
+ writeNext( item, context );
+ break;
+ case InlineItem::NextExpr:
+ writeNextExpr( item, context );
+ break;
+ case InlineItem::Break:
+ out << "<break></break>";
+ break;
+ case InlineItem::Ret:
+ out << "<ret></ret>";
+ break;
+ default: break;
+ }
+
+ if ( context != 0 )
+ out << "</sub_action>";
+}
+
+void XMLCodeGen::writePtrMod( InlineItem *item, InlineItem *context )
+{
+ if ( context != 0 && ( context->type == InlineItem::LmOnNext ||
+ context->type == InlineItem::LmOnLagBehind ||
+ context->type == InlineItem::LmSwitch ) )
+ {
+ switch ( item->type ) {
+ case InlineItem::Hold:
+ out << "<holdte></holdte>";
+ break;
+ case InlineItem::Exec:
+ writeActionExecTE( item );
+ break;
+ default: break;
+ }
+ }
+ else {
+ switch ( item->type ) {
+ case InlineItem::Hold:
+ out << "<hold></hold>";
+ break;
+ case InlineItem::Exec:
+ writeActionExec( item );
+ break;
+ default: break;
+ }
+ }
+}
+
+
+void XMLCodeGen::writeGoto( InlineItem *item, InlineItem *context )
+{
+ if ( pd->generatingSectionSubset )
+ out << "<goto>-1</goto>";
+ else {
+ EntryMapEl *targ = fsm->entryPoints.find( item->nameTarg->id );
+ out << "<goto>" << targ->value->alg.stateNum << "</goto>";
+ }
+}
+
+void XMLCodeGen::writeCall( InlineItem *item, InlineItem *context )
+{
+ if ( pd->generatingSectionSubset )
+ out << "<call>-1</call>";
+ else {
+ EntryMapEl *targ = fsm->entryPoints.find( item->nameTarg->id );
+ out << "<call>" << targ->value->alg.stateNum << "</call>";
+ }
+}
+
+void XMLCodeGen::writeNext( InlineItem *item, InlineItem *context )
+{
+ if ( pd->generatingSectionSubset )
+ out << "<next>-1</next>";
+ else {
+ EntryMapEl *targ = fsm->entryPoints.find( item->nameTarg->id );
+ out << "<next>" << targ->value->alg.stateNum << "</next>";
+ }
+}
+
+void XMLCodeGen::writeGotoExpr( InlineItem *item, InlineItem *context )
+{
+ out << "<goto_expr>";
+ writeInlineList( item->children, 0 );
+ out << "</goto_expr>";
+}
+
+void XMLCodeGen::writeCallExpr( InlineItem *item, InlineItem *context )
+{
+ out << "<call_expr>";
+ writeInlineList( item->children, 0 );
+ out << "</call_expr>";
+}
+
+void XMLCodeGen::writeNextExpr( InlineItem *item, InlineItem *context )
+{
+ out << "<next_expr>";
+ writeInlineList( item->children, 0 );
+ out << "</next_expr>";
+}
+
+void XMLCodeGen::writeEntry( InlineItem * item )
+{
+ if ( pd->generatingSectionSubset )
+ out << "<entry>-1</entry>";
+ else {
+ EntryMapEl *targ = fsm->entryPoints.find( item->nameTarg->id );
+ out << "<entry>" << targ->value->alg.stateNum << "</entry>";
+ }
+}
+
+void XMLCodeGen::writeActionExec( InlineItem *item )
+{
+ out << "<exec>";
+ writeInlineList( item->children, 0 );
+ out << "</exec>";
+}
+
+void XMLCodeGen::writeActionExecTE( InlineItem *item )
+{
+ out << "<execte>";
+ writeInlineList( item->children, 0 );
+ out << "</execte>";
+}
+
+void XMLCodeGen::writeLmOnLast( InlineItem *item )
+{
+ out << "<set_tokend>1</set_tokend>";
+ if ( item->longestMatchPart->action != 0 ) {
+ out << "<sub_action>";
+ writeInlineList( item->longestMatchPart->action->inlineList, item );
+ out << "</sub_action>";
+ }
+ out << "<exec><get_tokend></get_tokend></exec>";
+}
+
+void XMLCodeGen::writeLmOnNext( InlineItem *item )
+{
+ out << "<set_tokend>0</set_tokend>";
+ if ( item->longestMatchPart->action != 0 ) {
+ out << "<sub_action>";
+ writeInlineList( item->longestMatchPart->action->inlineList, item );
+ out << "</sub_action>";
+ }
+ out << "<exec><get_tokend></get_tokend></exec>";
+}
+
+void XMLCodeGen::writeLmOnLagBehind( InlineItem *item )
+{
+ if ( item->longestMatchPart->action != 0 ) {
+ out << "<sub_action>";
+ writeInlineList( item->longestMatchPart->action->inlineList, item );
+ out << "</sub_action>";
+ }
+ out << "<exec><get_tokend></get_tokend></exec>";
+}
+
+
+void XMLCodeGen::writeInlineList( InlineList *inlineList, InlineItem *context )
+{
+ for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
+ switch ( item->type ) {
+ case InlineItem::Text:
+ writeText( item );
+ break;
+ case InlineItem::Goto: case InlineItem::GotoExpr:
+ case InlineItem::Call: case InlineItem::CallExpr:
+ case InlineItem::Next: case InlineItem::NextExpr:
+ case InlineItem::Break: case InlineItem::Ret:
+ writeCtrlFlow( item, context );
+ break;
+ case InlineItem::PChar:
+ out << "<pchar></pchar>";
+ break;
+ case InlineItem::Char:
+ out << "<char></char>";
+ break;
+ case InlineItem::Curs:
+ out << "<curs></curs>";
+ break;
+ case InlineItem::Targs:
+ out << "<targs></targs>";
+ break;
+ case InlineItem::Entry:
+ writeEntry( item );
+ break;
+
+ case InlineItem::Hold:
+ case InlineItem::Exec:
+ writePtrMod( item, context );
+ break;
+
+ case InlineItem::LmSwitch:
+ writeLmSwitch( item );
+ break;
+ case InlineItem::LmSetActId:
+ out << "<set_act>" <<
+ item->longestMatchPart->longestMatchId <<
+ "</set_act>";
+ break;
+ case InlineItem::LmSetTokEnd:
+ out << "<set_tokend>1</set_tokend>";
+ break;
+ case InlineItem::LmOnLast:
+ writeLmOnLast( item );
+ break;
+ case InlineItem::LmOnNext:
+ writeLmOnNext( item );
+ break;
+ case InlineItem::LmOnLagBehind:
+ writeLmOnLagBehind( item );
+ break;
+ case InlineItem::LmInitAct:
+ out << "<init_act></init_act>";
+ break;
+ case InlineItem::LmInitTokStart:
+ out << "<init_tokstart></init_tokstart>";
+ break;
+ case InlineItem::LmSetTokStart:
+ out << "<set_tokstart></set_tokstart>";
+ break;
+ }
+ }
+}
+
+void XMLCodeGen::writeAction( Action *action )
+{
+ out << " <action id=\"" << action->actionId << "\"";
+ if ( action->name != 0 )
+ out << " name=\"" << action->name << "\"";
+ out << " line=\"" << action->loc.line << "\" col=\"" << action->loc.col << "\">";
+ writeInlineList( action->inlineList, 0 );
+ out << "</action>\n";
+}
+
+void xmlEscapeHost( std::ostream &out, char *data, int len )
+{
+ char *end = data + len;
+ while ( data != end ) {
+ switch ( *data ) {
+ case '<': out << "&lt;"; break;
+ case '>': out << "&gt;"; break;
+ case '&': out << "&amp;"; break;
+ default: out << *data; break;
+ }
+ data += 1;
+ }
+}
+
+void XMLCodeGen::writeStateActions( StateAp *state )
+{
+ RedActionTable *toStateActions = 0;
+ if ( state->toStateActionTable.length() > 0 )
+ toStateActions = actionTableMap.find( state->toStateActionTable );
+
+ RedActionTable *fromStateActions = 0;
+ if ( state->fromStateActionTable.length() > 0 )
+ fromStateActions = actionTableMap.find( state->fromStateActionTable );
+
+ RedActionTable *eofActions = 0;
+ if ( state->eofActionTable.length() > 0 )
+ eofActions = actionTableMap.find( state->eofActionTable );
+
+ if ( toStateActions != 0 || fromStateActions != 0 || eofActions != 0 ) {
+ out << " <state_actions>";
+ if ( toStateActions != 0 )
+ out << toStateActions->id;
+ else
+ out << "x";
+
+ if ( fromStateActions != 0 )
+ out << " " << fromStateActions->id;
+ else
+ out << " x";
+
+ if ( eofActions != 0 )
+ out << " " << eofActions->id;
+ else
+ out << " x"; out << "</state_actions>\n";
+ }
+}
+
+void XMLCodeGen::writeStateConditions( StateAp *state )
+{
+ if ( state->stateCondList.length() > 0 ) {
+ out << " <cond_list length=\"" << state->stateCondList.length() << "\">\n";
+ for ( StateCondList::Iter scdi = state->stateCondList; scdi.lte(); scdi++ ) {
+ out << " <c>";
+ writeKey( scdi->lowKey );
+ out << " ";
+ writeKey( scdi->highKey );
+ out << " ";
+ out << scdi->condSpace->condSpaceId;
+ out << "</c>\n";
+ }
+ out << " </cond_list>\n";
+ }
+}
+
+void XMLCodeGen::writeStateList()
+{
+ /* Write the list of states. */
+ out << " <state_list length=\"" << fsm->stateList.length() << "\">\n";
+ for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) {
+ out << " <state id=\"" << st->alg.stateNum << "\"";
+ if ( st->isFinState() )
+ out << " final=\"t\"";
+ out << ">\n";
+
+ writeStateActions( st );
+ writeStateConditions( st );
+ writeTransList( st );
+
+ out << " </state>\n";
+
+ if ( !st.last() )
+ out << "\n";
+ }
+ out << " </state_list>\n";
+}
+
+bool XMLCodeGen::writeNameInst( NameInst *nameInst )
+{
+ bool written = false;
+ if ( nameInst->parent != 0 )
+ written = writeNameInst( nameInst->parent );
+
+ if ( nameInst->name != 0 ) {
+ if ( written )
+ out << '_';
+ out << nameInst->name;
+ written = true;
+ }
+
+ return written;
+}
+
+void XMLCodeGen::writeEntryPoints()
+{
+ /* List of entry points other than start state. */
+ if ( fsm->entryPoints.length() > 0 || pd->lmRequiresErrorState ) {
+ out << " <entry_points";
+ if ( pd->lmRequiresErrorState )
+ out << " error=\"t\"";
+ out << ">\n";
+ for ( EntryMap::Iter en = fsm->entryPoints; en.lte(); en++ ) {
+ /* Get the name instantiation from nameIndex. */
+ NameInst *nameInst = pd->nameIndex[en->key];
+ StateAp *state = en->value;
+ out << " <entry name=\"";
+ writeNameInst( nameInst );
+ out << "\">" << state->alg.stateNum << "</entry>\n";
+ }
+ out << " </entry_points>\n";
+ }
+}
+
+void XMLCodeGen::writeMachine()
+{
+ /* Open the machine. */
+ out << " <machine>\n";
+
+ /* Action tables. */
+ reduceActionTables();
+
+ writeActionList();
+ writeActionTableList();
+ writeConditions();
+
+ /* Start state. */
+ GraphDictEl *mainEl = pd->graphDict.find( mainMachine );
+ if ( mainEl != 0 ) {
+ out << " <start_state>" << fsm->startState->alg.stateNum <<
+ "</start_state>\n";
+ }
+
+ /* Error state. */
+ if ( fsm->errState != 0 ) {
+ out << " <error_state>" << fsm->errState->alg.stateNum <<
+ "</error_state>\n";
+ }
+
+ writeEntryPoints();
+ writeStateList();
+
+ out << " </machine>\n";
+}
+
+void XMLCodeGen::writeAlphType()
+{
+ out << " <alphtype>" <<
+ (keyOps->alphType - hostLang->hostTypes) << "</alphtype>\n";
+}
+
+void XMLCodeGen::writeGetKeyExpr()
+{
+ out << " <getkey>";
+ writeInlineList( pd->getKeyExpr, 0 );
+ out << "</getkey>\n";
+}
+
+void XMLCodeGen::writeAccessExpr()
+{
+ out << " <access>";
+ writeInlineList( pd->accessExpr, 0 );
+ out << "</access>\n";
+}
+
+void XMLCodeGen::writeCurStateExpr()
+{
+ out << " <curstate>";
+ writeInlineList( pd->curStateExpr, 0 );
+ out << "</curstate>\n";
+}
+
+void XMLCodeGen::writeConditions()
+{
+ if ( condData->condSpaceMap.length() > 0 ) {
+ long nextCondSpaceId = 0;
+ for ( CondSpaceMap::Iter cs = condData->condSpaceMap; cs.lte(); cs++ )
+ cs->condSpaceId = nextCondSpaceId++;
+
+ out << " <cond_space_list length=\"" << condData->condSpaceMap.length() << "\">\n";
+ for ( CondSpaceMap::Iter cs = condData->condSpaceMap; cs.lte(); cs++ ) {
+ out << " <cond_space id=\"" << cs->condSpaceId <<
+ "\" length=\"" << cs->condSet.length() << "\">";
+ writeKey( cs->baseKey );
+ for ( CondSet::Iter csi = cs->condSet; csi.lte(); csi++ )
+ out << " " << (*csi)->actionId;
+ out << "</cond_space>\n";
+ }
+ out << " </cond_space_list>\n";
+ }
+}
+
+void XMLCodeGen::writeExports()
+{
+ if ( pd->exportList.length() > 0 ) {
+ out << " <exports>\n";
+ for ( ExportList::Iter exp = pd->exportList; exp.lte(); exp++ ) {
+ out << " <ex name=\"" << exp->name << "\">";
+ writeKey( exp->key );
+ out << "</ex>\n";
+ }
+ out << " </exports>\n";
+ }
+}
+
+void XMLCodeGen::writeXML()
+{
+ /* Open the definition. */
+ out << "<ragel_def name=\"" << fsmName << "\">\n";
+ writeAlphType();
+
+ if ( pd->getKeyExpr != 0 )
+ writeGetKeyExpr();
+
+ if ( pd->accessExpr != 0 )
+ writeAccessExpr();
+
+ if ( pd->curStateExpr != 0 )
+ writeCurStateExpr();
+
+ writeExports();
+
+ writeMachine();
+
+ out <<
+ "</ragel_def>\n";
+}
+
diff --git a/contrib/tools/ragel5/ragel/xmlcodegen.h b/contrib/tools/ragel5/ragel/xmlcodegen.h
new file mode 100644
index 0000000000..99b985395a
--- /dev/null
+++ b/contrib/tools/ragel5/ragel/xmlcodegen.h
@@ -0,0 +1,137 @@
+/*
+ * Copyright 2005, 2006 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _XMLDOTGEN_H
+#define _XMLDOTGEN_H
+
+#include <iostream>
+#include "avltree.h"
+#include "fsmgraph.h"
+#include "parsedata.h"
+
+/* Forwards. */
+struct TransAp;
+struct FsmAp;
+struct ParseData;
+
+struct RedActionTable
+:
+ public AvlTreeEl<RedActionTable>
+{
+ RedActionTable( const ActionTable &key )
+ :
+ key(key),
+ id(0)
+ { }
+
+ const ActionTable &getKey()
+ { return key; }
+
+ ActionTable key;
+ int id;
+};
+
+typedef AvlTree<RedActionTable, ActionTable, CmpActionTable> ActionTableMap;
+
+struct NextRedTrans
+{
+ Key lowKey, highKey;
+ TransAp *trans;
+ TransAp *next;
+
+ void load() {
+ if ( trans != 0 ) {
+ next = trans->next;
+ lowKey = trans->lowKey;
+ highKey = trans->highKey;
+ }
+ }
+
+ NextRedTrans( TransAp *t ) {
+ trans = t;
+ load();
+ }
+
+ void increment() {
+ trans = next;
+ load();
+ }
+};
+
+class XMLCodeGen
+{
+public:
+ XMLCodeGen( char *fsmName, ParseData *pd, FsmAp *fsm, std::ostream &out );
+ void writeXML( );
+
+private:
+ void appendTrans( TransListVect &outList, Key lowKey, Key highKey, TransAp *trans );
+ void writeStateActions( StateAp *state );
+ void writeStateList();
+ void writeStateConditions( StateAp *state );
+
+ void writeKey( Key key );
+ void writeText( InlineItem *item );
+ void writeCtrlFlow( InlineItem *item, InlineItem *context );
+ void writePtrMod( InlineItem *item, InlineItem *context );
+ void writeGoto( InlineItem *item, InlineItem *context );
+ void writeGotoExpr( InlineItem *item, InlineItem *context );
+ void writeCall( InlineItem *item, InlineItem *context );
+ void writeCallExpr( InlineItem *item, InlineItem *context );
+ void writeNext( InlineItem *item, InlineItem *context );
+ void writeNextExpr( InlineItem *item, InlineItem *context );
+ void writeEntry( InlineItem *item );
+ void writeLmSetActId( InlineItem *item );
+ void writeLmOnLast( InlineItem *item );
+ void writeLmOnNext( InlineItem *item );
+ void writeLmOnLagBehind( InlineItem *item );
+
+ void writeExports();
+ bool writeNameInst( NameInst *nameInst );
+ void writeEntryPoints();
+ void writeGetKeyExpr();
+ void writeAccessExpr();
+ void writeCurStateExpr();
+ void writeConditions();
+ void writeInlineList( InlineList *inlineList, InlineItem *context );
+ void writeAlphType();
+ void writeActionList();
+ void writeActionTableList();
+ void reduceTrans( TransAp *trans );
+ void reduceActionTables();
+ void writeTransList( StateAp *state );
+ void writeTrans( Key lowKey, Key highKey, TransAp *defTrans );
+ void writeAction( Action *action );
+ void writeLmSwitch( InlineItem *item );
+ void writeMachine();
+ void writeActionExec( InlineItem *item );
+ void writeActionExecTE( InlineItem *item );
+
+ char *fsmName;
+ ParseData *pd;
+ FsmAp *fsm;
+ std::ostream &out;
+ ActionTableMap actionTableMap;
+ int nextActionTableId;
+};
+
+
+#endif /* _XMLDOTGEN_H */
diff --git a/contrib/tools/ragel5/ragel/ya.make b/contrib/tools/ragel5/ragel/ya.make
new file mode 100644
index 0000000000..6966321b7c
--- /dev/null
+++ b/contrib/tools/ragel5/ragel/ya.make
@@ -0,0 +1,26 @@
+PROGRAM(ragel5)
+
+NO_UTIL()
+NO_COMPILER_WARNINGS()
+
+PEERDIR(
+ contrib/tools/ragel5/aapl
+ contrib/tools/ragel5/common
+)
+
+SRCS(
+ fsmap.cpp
+ fsmattach.cpp
+ fsmbase.cpp
+ fsmgraph.cpp
+ fsmmin.cpp
+ fsmstate.cpp
+ main.cpp
+ parsedata.cpp
+ parsetree.cpp
+ rlparse.cpp
+ rlscan.cpp
+ xmlcodegen.cpp
+)
+
+END()
diff --git a/contrib/tools/ragel5/redfsm/gendata.cpp b/contrib/tools/ragel5/redfsm/gendata.cpp
new file mode 100644
index 0000000000..b0893ccdc2
--- /dev/null
+++ b/contrib/tools/ragel5/redfsm/gendata.cpp
@@ -0,0 +1,717 @@
+/*
+ * Copyright 2005-2007 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "gendata.h"
+#include <iostream>
+
+using std::cerr;
+using std::endl;
+
+CodeGenData::CodeGenData( ostream &out )
+:
+ sourceFileName(0),
+ fsmName(0),
+ out(out),
+ redFsm(0),
+ allActions(0),
+ allActionTables(0),
+ allConditions(0),
+ allCondSpaces(0),
+ allStates(0),
+ nameIndex(0),
+ startState(-1),
+ errState(-1),
+ getKeyExpr(0),
+ accessExpr(0),
+ curStateExpr(0),
+ wantComplete(0),
+ hasLongestMatch(false),
+ codeGenErrCount(0),
+ hasEnd(true),
+ dataPrefix(true),
+ writeFirstFinal(true),
+ writeErr(true)
+{}
+
+
+void CodeGenData::createMachine()
+{
+ redFsm = new RedFsmAp();
+}
+
+void CodeGenData::initActionList( unsigned long length )
+{
+ allActions = new Action[length];
+ for ( unsigned long a = 0; a < length; a++ )
+ actionList.append( allActions+a );
+}
+
+void CodeGenData::newAction( int anum, char *name, int line,
+ int col, InlineList *inlineList )
+{
+ allActions[anum].actionId = anum;
+ allActions[anum].name = name;
+ allActions[anum].loc.line = line;
+ allActions[anum].loc.col = col;
+ allActions[anum].inlineList = inlineList;
+}
+
+void CodeGenData::initActionTableList( unsigned long length )
+{
+ allActionTables = new RedAction[length];
+}
+
+void CodeGenData::initStateList( unsigned long length )
+{
+ allStates = new RedStateAp[length];
+ for ( unsigned long s = 0; s < length; s++ )
+ redFsm->stateList.append( allStates+s );
+
+ /* We get the start state as an offset, set the pointer now. */
+ if ( startState >= 0 )
+ redFsm->startState = allStates + startState;
+ if ( errState >= 0 )
+ redFsm->errState = allStates + errState;
+ for ( EntryIdVect::Iter en = entryPointIds; en.lte(); en++ )
+ redFsm->entryPoints.insert( allStates + *en );
+
+ /* The nextStateId is no longer used to assign state ids (they come in set
+ * from the frontend now), however generation code still depends on it.
+ * Should eventually remove this variable. */
+ redFsm->nextStateId = redFsm->stateList.length();
+}
+
+void CodeGenData::setStartState( unsigned long startState )
+{
+ this->startState = startState;
+}
+
+void CodeGenData::setErrorState( unsigned long errState )
+{
+ this->errState = errState;
+}
+
+void CodeGenData::addEntryPoint( char *name, unsigned long entryState )
+{
+ entryPointIds.append( entryState );
+ entryPointNames.append( name );
+}
+
+void CodeGenData::initTransList( int snum, unsigned long length )
+{
+ /* Could preallocate the out range to save time growing it. For now do
+ * nothing. */
+}
+
+void CodeGenData::newTrans( int snum, int tnum, Key lowKey,
+ Key highKey, long targ, long action )
+{
+ /* Get the current state and range. */
+ RedStateAp *curState = allStates + snum;
+ RedTransList &destRange = curState->outRange;
+
+ if ( curState == redFsm->errState )
+ return;
+
+ /* Make the new transitions. */
+ RedStateAp *targState = targ >= 0 ? (allStates + targ) :
+ wantComplete ? redFsm->getErrorState() : 0;
+ RedAction *actionTable = action >= 0 ? (allActionTables + action) : 0;
+ RedTransAp *trans = redFsm->allocateTrans( targState, actionTable );
+ RedTransEl transEl( lowKey, highKey, trans );
+
+ if ( wantComplete ) {
+ /* If the machine is to be complete then we need to fill any gaps with
+ * the error transitions. */
+ if ( destRange.length() == 0 ) {
+ /* Range is currently empty. */
+ if ( keyOps->minKey < lowKey ) {
+ /* The first range doesn't start at the low end. */
+ Key fillHighKey = lowKey;
+ fillHighKey.decrement();
+
+ /* Create the filler with the state's error transition. */
+ RedTransEl newTel( keyOps->minKey, fillHighKey, redFsm->getErrorTrans() );
+ destRange.append( newTel );
+ }
+ }
+ else {
+ /* The range list is not empty, get the the last range. */
+ RedTransEl *last = &destRange[destRange.length()-1];
+ Key nextKey = last->highKey;
+ nextKey.increment();
+ if ( nextKey < lowKey ) {
+ /* There is a gap to fill. Make the high key. */
+ Key fillHighKey = lowKey;
+ fillHighKey.decrement();
+
+ /* Create the filler with the state's error transtion. */
+ RedTransEl newTel( nextKey, fillHighKey, redFsm->getErrorTrans() );
+ destRange.append( newTel );
+ }
+ }
+ }
+
+ /* Filler taken care of. Append the range. */
+ destRange.append( RedTransEl( lowKey, highKey, trans ) );
+}
+
+void CodeGenData::finishTransList( int snum )
+{
+ /* Get the current state and range. */
+ RedStateAp *curState = allStates + snum;
+ RedTransList &destRange = curState->outRange;
+
+ if ( curState == redFsm->errState )
+ return;
+
+ /* If building a complete machine we may need filler on the end. */
+ if ( wantComplete ) {
+ /* Check if there are any ranges already. */
+ if ( destRange.length() == 0 ) {
+ /* Fill with the whole alphabet. */
+ /* Add the range on the lower and upper bound. */
+ RedTransEl newTel( keyOps->minKey, keyOps->maxKey, redFsm->getErrorTrans() );
+ destRange.append( newTel );
+ }
+ else {
+ /* Get the last and check for a gap on the end. */
+ RedTransEl *last = &destRange[destRange.length()-1];
+ if ( last->highKey < keyOps->maxKey ) {
+ /* Make the high key. */
+ Key fillLowKey = last->highKey;
+ fillLowKey.increment();
+
+ /* Create the new range with the error trans and append it. */
+ RedTransEl newTel( fillLowKey, keyOps->maxKey, redFsm->getErrorTrans() );
+ destRange.append( newTel );
+ }
+ }
+ }
+}
+
+void CodeGenData::setId( int snum, int id )
+{
+ RedStateAp *curState = allStates + snum;
+ curState->id = id;
+}
+
+void CodeGenData::setFinal( int snum )
+{
+ RedStateAp *curState = allStates + snum;
+ curState->isFinal = true;
+}
+
+
+void CodeGenData::setStateActions( int snum, long toStateAction,
+ long fromStateAction, long eofAction )
+{
+ RedStateAp *curState = allStates + snum;
+ if ( toStateAction >= 0 )
+ curState->toStateAction = allActionTables + toStateAction;
+ if ( fromStateAction >= 0 )
+ curState->fromStateAction = allActionTables + fromStateAction;
+ if ( eofAction >= 0 )
+ curState->eofAction = allActionTables + eofAction;
+}
+
+void CodeGenData::resolveTargetStates( InlineList *inlineList )
+{
+ for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
+ switch ( item->type ) {
+ case InlineItem::Goto: case InlineItem::Call:
+ case InlineItem::Next: case InlineItem::Entry:
+ item->targState = allStates + item->targId;
+ break;
+ default:
+ break;
+ }
+
+ if ( item->children != 0 )
+ resolveTargetStates( item->children );
+ }
+}
+
+void CodeGenData::closeMachine()
+{
+ for ( ActionList::Iter a = actionList; a.lte(); a++ )
+ resolveTargetStates( a->inlineList );
+
+ /* Note that even if we want a complete graph we do not give the error
+ * state a default transition. All machines break out of the processing
+ * loop when in the error state. */
+
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ for ( StateCondList::Iter sci = st->stateCondList; sci.lte(); sci++ )
+ st->stateCondVect.append( sci );
+ }
+}
+
+
+bool CodeGenData::setAlphType( char *data )
+{
+ /* FIXME: This should validate the alphabet type selection. */
+ HostType *alphType = hostLang->hostTypes + atoi(data);
+ thisKeyOps.setAlphType( alphType );
+ return true;
+}
+
+void CodeGenData::initCondSpaceList( ulong length )
+{
+ allCondSpaces = new CondSpace[length];
+ for ( ulong c = 0; c < length; c++ )
+ condSpaceList.append( allCondSpaces + c );
+}
+
+void CodeGenData::newCondSpace( int cnum, int condSpaceId, Key baseKey )
+{
+ CondSpace *cond = allCondSpaces + cnum;
+ cond->condSpaceId = condSpaceId;
+ cond->baseKey = baseKey;
+}
+
+void CodeGenData::condSpaceItem( int cnum, long condActionId )
+{
+ CondSpace *cond = allCondSpaces + cnum;
+ cond->condSet.append( allActions + condActionId );
+}
+
+void CodeGenData::initStateCondList( int snum, ulong length )
+{
+ /* Could preallocate these, as we could with transitions. */
+}
+
+void CodeGenData::addStateCond( int snum, Key lowKey, Key highKey, long condNum )
+{
+ RedStateAp *curState = allStates + snum;
+
+ /* Create the new state condition. */
+ StateCond *stateCond = new StateCond;
+ stateCond->lowKey = lowKey;
+ stateCond->highKey = highKey;
+
+ /* Assign it a cond space. */
+ CondSpace *condSpace = allCondSpaces + condNum;
+ stateCond->condSpace = condSpace;
+
+ curState->stateCondList.append( stateCond );
+}
+
+
+CondSpace *CodeGenData::findCondSpace( Key lowKey, Key highKey )
+{
+ for ( CondSpaceList::Iter cs = condSpaceList; cs.lte(); cs++ ) {
+ Key csHighKey = cs->baseKey;
+ csHighKey += keyOps->alphSize() * (1 << cs->condSet.length());
+
+ if ( lowKey >= cs->baseKey && highKey <= csHighKey )
+ return cs;
+ }
+ return 0;
+}
+
+Condition *CodeGenData::findCondition( Key key )
+{
+ for ( ConditionList::Iter cond = conditionList; cond.lte(); cond++ ) {
+ Key upperKey = cond->baseKey + (1 << cond->condSet.length());
+ if ( cond->baseKey <= key && key <= upperKey )
+ return cond;
+ }
+ return 0;
+}
+
+Key CodeGenData::findMaxKey()
+{
+ Key maxKey = keyOps->maxKey;
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ assert( st->outSingle.length() == 0 );
+ assert( st->defTrans == 0 );
+
+ long rangeLen = st->outRange.length();
+ if ( rangeLen > 0 ) {
+ Key highKey = st->outRange[rangeLen-1].highKey;
+ if ( highKey > maxKey )
+ maxKey = highKey;
+ }
+ }
+ return maxKey;
+}
+
+void CodeGenData::findFinalActionRefs()
+{
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* Rerence count out of single transitions. */
+ for ( RedTransList::Iter rtel = st->outSingle; rtel.lte(); rtel++ ) {
+ if ( rtel->value->action != 0 ) {
+ rtel->value->action->numTransRefs += 1;
+ for ( ActionTable::Iter item = rtel->value->action->key; item.lte(); item++ )
+ item->value->numTransRefs += 1;
+ }
+ }
+
+ /* Reference count out of range transitions. */
+ for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) {
+ if ( rtel->value->action != 0 ) {
+ rtel->value->action->numTransRefs += 1;
+ for ( ActionTable::Iter item = rtel->value->action->key; item.lte(); item++ )
+ item->value->numTransRefs += 1;
+ }
+ }
+
+ /* Reference count default transition. */
+ if ( st->defTrans != 0 && st->defTrans->action != 0 ) {
+ st->defTrans->action->numTransRefs += 1;
+ for ( ActionTable::Iter item = st->defTrans->action->key; item.lte(); item++ )
+ item->value->numTransRefs += 1;
+ }
+
+ /* Reference count to state actions. */
+ if ( st->toStateAction != 0 ) {
+ st->toStateAction->numToStateRefs += 1;
+ for ( ActionTable::Iter item = st->toStateAction->key; item.lte(); item++ )
+ item->value->numToStateRefs += 1;
+ }
+
+ /* Reference count from state actions. */
+ if ( st->fromStateAction != 0 ) {
+ st->fromStateAction->numFromStateRefs += 1;
+ for ( ActionTable::Iter item = st->fromStateAction->key; item.lte(); item++ )
+ item->value->numFromStateRefs += 1;
+ }
+
+ /* Reference count EOF actions. */
+ if ( st->eofAction != 0 ) {
+ st->eofAction->numEofRefs += 1;
+ for ( ActionTable::Iter item = st->eofAction->key; item.lte(); item++ )
+ item->value->numEofRefs += 1;
+ }
+ }
+}
+
+void CodeGenData::analyzeAction( Action *act, InlineList *inlineList )
+{
+ for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
+ /* Only consider actions that are referenced. */
+ if ( act->numRefs() > 0 ) {
+ if ( item->type == InlineItem::Goto || item->type == InlineItem::GotoExpr )
+ redFsm->bAnyActionGotos = true;
+ else if ( item->type == InlineItem::Call || item->type == InlineItem::CallExpr )
+ redFsm->bAnyActionCalls = true;
+ else if ( item->type == InlineItem::Ret )
+ redFsm->bAnyActionRets = true;
+ }
+
+ /* Check for various things in regular actions. */
+ if ( act->numTransRefs > 0 || act->numToStateRefs > 0 || act->numFromStateRefs > 0 ) {
+ /* Any returns in regular actions? */
+ if ( item->type == InlineItem::Ret )
+ redFsm->bAnyRegActionRets = true;
+
+ /* Any next statements in the regular actions? */
+ if ( item->type == InlineItem::Next || item->type == InlineItem::NextExpr )
+ redFsm->bAnyRegNextStmt = true;
+
+ /* Any by value control in regular actions? */
+ if ( item->type == InlineItem::CallExpr || item->type == InlineItem::GotoExpr )
+ redFsm->bAnyRegActionByValControl = true;
+
+ /* Any references to the current state in regular actions? */
+ if ( item->type == InlineItem::Curs )
+ redFsm->bAnyRegCurStateRef = true;
+
+ if ( item->type == InlineItem::Break )
+ redFsm->bAnyRegBreak = true;
+
+ if ( item->type == InlineItem::LmSwitch && item->handlesError )
+ redFsm->bAnyLmSwitchError = true;
+ }
+
+ if ( item->children != 0 )
+ analyzeAction( act, item->children );
+ }
+}
+
+void CodeGenData::analyzeActionList( RedAction *redAct, InlineList *inlineList )
+{
+ for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
+ /* Any next statements in the action table? */
+ if ( item->type == InlineItem::Next || item->type == InlineItem::NextExpr )
+ redAct->bAnyNextStmt = true;
+
+ /* Any references to the current state. */
+ if ( item->type == InlineItem::Curs )
+ redAct->bAnyCurStateRef = true;
+
+ if ( item->type == InlineItem::Break )
+ redAct->bAnyBreakStmt = true;
+
+ if ( item->children != 0 )
+ analyzeActionList( redAct, item->children );
+ }
+}
+
+/* Assign ids to referenced actions. */
+void CodeGenData::assignActionIds()
+{
+ int nextActionId = 0;
+ for ( ActionList::Iter act = actionList; act.lte(); act++ ) {
+ /* Only ever interested in referenced actions. */
+ if ( act->numRefs() > 0 )
+ act->actionId = nextActionId++;
+ }
+}
+
+void CodeGenData::setValueLimits()
+{
+ redFsm->maxSingleLen = 0;
+ redFsm->maxRangeLen = 0;
+ redFsm->maxKeyOffset = 0;
+ redFsm->maxIndexOffset = 0;
+ redFsm->maxActListId = 0;
+ redFsm->maxActionLoc = 0;
+ redFsm->maxActArrItem = 0;
+ redFsm->maxSpan = 0;
+ redFsm->maxCondSpan = 0;
+ redFsm->maxFlatIndexOffset = 0;
+ redFsm->maxCondOffset = 0;
+ redFsm->maxCondLen = 0;
+ redFsm->maxCondSpaceId = 0;
+ redFsm->maxCondIndexOffset = 0;
+
+ /* In both of these cases the 0 index is reserved for no value, so the max
+ * is one more than it would be if they started at 0. */
+ redFsm->maxIndex = redFsm->transSet.length();
+ redFsm->maxCond = condSpaceList.length();
+
+ /* The nextStateId - 1 is the last state id assigned. */
+ redFsm->maxState = redFsm->nextStateId - 1;
+
+ for ( CondSpaceList::Iter csi = condSpaceList; csi.lte(); csi++ ) {
+ if ( csi->condSpaceId > redFsm->maxCondSpaceId )
+ redFsm->maxCondSpaceId = csi->condSpaceId;
+ }
+
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* Maximum cond length. */
+ if ( st->stateCondList.length() > redFsm->maxCondLen )
+ redFsm->maxCondLen = st->stateCondList.length();
+
+ /* Maximum single length. */
+ if ( st->outSingle.length() > redFsm->maxSingleLen )
+ redFsm->maxSingleLen = st->outSingle.length();
+
+ /* Maximum range length. */
+ if ( st->outRange.length() > redFsm->maxRangeLen )
+ redFsm->maxRangeLen = st->outRange.length();
+
+ /* The key offset index offset for the state after last is not used, skip it.. */
+ if ( ! st.last() ) {
+ redFsm->maxCondOffset += st->stateCondList.length();
+ redFsm->maxKeyOffset += st->outSingle.length() + st->outRange.length()*2;
+ redFsm->maxIndexOffset += st->outSingle.length() + st->outRange.length() + 1;
+ }
+
+ /* Max cond span. */
+ if ( st->condList != 0 ) {
+ unsigned long long span = keyOps->span( st->condLowKey, st->condHighKey );
+ if ( span > redFsm->maxCondSpan )
+ redFsm->maxCondSpan = span;
+ }
+
+ /* Max key span. */
+ if ( st->transList != 0 ) {
+ unsigned long long span = keyOps->span( st->lowKey, st->highKey );
+ if ( span > redFsm->maxSpan )
+ redFsm->maxSpan = span;
+ }
+
+ /* Max cond index offset. */
+ if ( ! st.last() ) {
+ if ( st->condList != 0 )
+ redFsm->maxCondIndexOffset += keyOps->span( st->condLowKey, st->condHighKey );
+ }
+
+ /* Max flat index offset. */
+ if ( ! st.last() ) {
+ if ( st->transList != 0 )
+ redFsm->maxFlatIndexOffset += keyOps->span( st->lowKey, st->highKey );
+ redFsm->maxFlatIndexOffset += 1;
+ }
+ }
+
+ for ( ActionTableMap::Iter at = redFsm->actionMap; at.lte(); at++ ) {
+ /* Maximum id of action lists. */
+ if ( at->actListId+1 > redFsm->maxActListId )
+ redFsm->maxActListId = at->actListId+1;
+
+ /* Maximum location of items in action array. */
+ if ( at->location+1 > redFsm->maxActionLoc )
+ redFsm->maxActionLoc = at->location+1;
+
+ /* Maximum values going into the action array. */
+ if ( at->key.length() > redFsm->maxActArrItem )
+ redFsm->maxActArrItem = at->key.length();
+ for ( ActionTable::Iter item = at->key; item.lte(); item++ ) {
+ if ( item->value->actionId > redFsm->maxActArrItem )
+ redFsm->maxActArrItem = item->value->actionId;
+ }
+ }
+}
+
+
+
+/* Gather various info on the machine. */
+void CodeGenData::analyzeMachine()
+{
+ /* Find the true count of action references. */
+ findFinalActionRefs();
+
+ /* Check if there are any calls in action code. */
+ for ( ActionList::Iter act = actionList; act.lte(); act++ ) {
+ /* Record the occurrence of various kinds of actions. */
+ if ( act->numToStateRefs > 0 )
+ redFsm->bAnyToStateActions = true;
+ if ( act->numFromStateRefs > 0 )
+ redFsm->bAnyFromStateActions = true;
+ if ( act->numEofRefs > 0 )
+ redFsm->bAnyEofActions = true;
+ if ( act->numTransRefs > 0 )
+ redFsm->bAnyRegActions = true;
+
+ /* Recurse through the action's parse tree looking for various things. */
+ analyzeAction( act, act->inlineList );
+ }
+
+ /* Analyze reduced action lists. */
+ for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) {
+ for ( ActionTable::Iter act = redAct->key; act.lte(); act++ )
+ analyzeActionList( redAct, act->value->inlineList );
+ }
+
+ /* Find states that have transitions with actions that have next
+ * statements. */
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* Check any actions out of outSinge. */
+ for ( RedTransList::Iter rtel = st->outSingle; rtel.lte(); rtel++ ) {
+ if ( rtel->value->action != 0 && rtel->value->action->anyCurStateRef() )
+ st->bAnyRegCurStateRef = true;
+ }
+
+ /* Check any actions out of outRange. */
+ for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) {
+ if ( rtel->value->action != 0 && rtel->value->action->anyCurStateRef() )
+ st->bAnyRegCurStateRef = true;
+ }
+
+ /* Check any action out of default. */
+ if ( st->defTrans != 0 && st->defTrans->action != 0 &&
+ st->defTrans->action->anyCurStateRef() )
+ st->bAnyRegCurStateRef = true;
+
+ if ( st->stateCondList.length() > 0 )
+ redFsm->bAnyConditions = true;
+ }
+
+ /* Assign ids to actions that are referenced. */
+ assignActionIds();
+
+ /* Set the maximums of various values used for deciding types. */
+ setValueLimits();
+}
+
+void CodeGenData::writeStatement( InputLoc &loc, int nargs, char **args )
+{
+ /* FIXME: This should be moved to the virtual functions in the code
+ * generators.
+ *
+ * Force a newline. */
+ out << "\n";
+ genLineDirective( out );
+
+ if ( strcmp( args[0], "data" ) == 0 ) {
+ for ( int i = 1; i < nargs; i++ ) {
+ if ( strcmp( args[i], "noerror" ) == 0 )
+ writeErr = false;
+ else if ( strcmp( args[i], "noprefix" ) == 0 )
+ dataPrefix = false;
+ else if ( strcmp( args[i], "nofinal" ) == 0 )
+ writeFirstFinal = false;
+ else {
+ source_warning(loc) << "unrecognized write option \"" <<
+ args[i] << "\"" << endl;
+ }
+ }
+ writeData();
+ }
+ else if ( strcmp( args[0], "init" ) == 0 ) {
+ for ( int i = 1; i < nargs; i++ ) {
+ source_warning(loc) << "unrecognized write option \"" <<
+ args[i] << "\"" << endl;
+ }
+ writeInit();
+ }
+ else if ( strcmp( args[0], "exec" ) == 0 ) {
+ for ( int i = 1; i < nargs; i++ ) {
+ if ( strcmp( args[i], "noend" ) == 0 )
+ hasEnd = false;
+ else {
+ source_warning(loc) << "unrecognized write option \"" <<
+ args[i] << "\"" << endl;
+ }
+ }
+ writeExec();
+ }
+ else if ( strcmp( args[0], "eof" ) == 0 ) {
+ for ( int i = 1; i < nargs; i++ ) {
+ source_warning(loc) << "unrecognized write option \"" <<
+ args[i] << "\"" << endl;
+ }
+ writeEOF();
+ }
+ else if ( strcmp( args[0], "exports" ) == 0 ) {
+ for ( int i = 1; i < nargs; i++ ) {
+ source_warning(loc) << "unrecognized write option \"" <<
+ args[i] << "\"" << endl;
+ }
+ writeExports();
+ }
+ else {
+ /* EMIT An error here. */
+ source_error(loc) << "unrecognized write command \"" <<
+ args[0] << "\"" << endl;
+ }
+}
+
+ostream &CodeGenData::source_warning( const InputLoc &loc )
+{
+ cerr << sourceFileName << ":" << loc.line << ":" << loc.col << ": warning: ";
+ return cerr;
+}
+
+ostream &CodeGenData::source_error( const InputLoc &loc )
+{
+ codeGenErrCount += 1;
+ assert( sourceFileName != 0 );
+ cerr << sourceFileName << ":" << loc.line << ":" << loc.col << ": ";
+ return cerr;
+}
+
+
diff --git a/contrib/tools/ragel5/redfsm/gendata.h b/contrib/tools/ragel5/redfsm/gendata.h
new file mode 100644
index 0000000000..855e0710a7
--- /dev/null
+++ b/contrib/tools/ragel5/redfsm/gendata.h
@@ -0,0 +1,167 @@
+/*
+ * Copyright 2005-2007 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _GENDATA_H
+#define _GENDATA_H
+
+#include <iostream>
+#include "redfsm.h"
+#include "common.h"
+
+using std::ostream;
+
+struct NameInst;
+typedef DList<Action> ActionList;
+
+typedef unsigned long ulong;
+
+struct FsmCodeGen;
+struct CodeGenData;
+
+typedef AvlMap<char *, CodeGenData*, CmpStr> CodeGenMap;
+typedef AvlMapEl<char *, CodeGenData*> CodeGenMapEl;
+
+/*
+ * The interface to the parser
+ */
+
+/* These functions must be implemented by the code generation executable.
+ * The openOutput function is invoked when the root element is opened. The
+ * makeCodeGen function is invoked when a ragel_def element is opened. */
+std::ostream *openOutput( char *inputFile );
+CodeGenData *makeCodeGen( char *sourceFileName,
+ char *fsmName, ostream &out, bool wantComplete );
+
+void lineDirective( ostream &out, char *fileName, int line );
+void genLineDirective( ostream &out );
+
+/*********************************/
+
+struct CodeGenData
+{
+ /*
+ * The interface to the code generator.
+ */
+ virtual void finishRagelDef() {}
+
+ /* These are invoked by the corresponding write statements. */
+ virtual void writeData() {};
+ virtual void writeInit() {};
+ virtual void writeExec() {};
+ virtual void writeEOF() {};
+ virtual void writeExports() {};
+
+ /* This can also be overwridden to modify the processing of write
+ * statements. */
+ virtual void writeStatement( InputLoc &loc, int nargs, char **args );
+
+ /********************/
+
+ CodeGenData( ostream &out );
+ virtual ~CodeGenData() {}
+
+ /*
+ * Collecting the machine.
+ */
+
+ char *sourceFileName;
+ char *fsmName;
+ ostream &out;
+ RedFsmAp *redFsm;
+ Action *allActions;
+ RedAction *allActionTables;
+ Condition *allConditions;
+ CondSpace *allCondSpaces;
+ RedStateAp *allStates;
+ NameInst **nameIndex;
+ int startState;
+ int errState;
+ ActionList actionList;
+ ConditionList conditionList;
+ CondSpaceList condSpaceList;
+ InlineList *getKeyExpr;
+ InlineList *accessExpr;
+ InlineList *curStateExpr;
+ KeyOps thisKeyOps;
+ bool wantComplete;
+ EntryIdVect entryPointIds;
+ EntryNameVect entryPointNames;
+ bool hasLongestMatch;
+ int codeGenErrCount;
+ ExportList exportList;
+
+ /* Write options. */
+ bool hasEnd;
+ bool dataPrefix;
+ bool writeFirstFinal;
+ bool writeErr;
+
+ void createMachine();
+ void initActionList( unsigned long length );
+ void newAction( int anum, char *name, int line, int col, InlineList *inlineList );
+ void initActionTableList( unsigned long length );
+ void initStateList( unsigned long length );
+ void setStartState( unsigned long startState );
+ void setErrorState( unsigned long errState );
+ void addEntryPoint( char *name, unsigned long entryState );
+ void setId( int snum, int id );
+ void setFinal( int snum );
+ void initTransList( int snum, unsigned long length );
+ void newTrans( int snum, int tnum, Key lowKey, Key highKey,
+ long targ, long act );
+ void finishTransList( int snum );
+ void setStateActions( int snum, long toStateAction,
+ long fromStateAction, long eofAction );
+ void setForcedErrorState()
+ { redFsm->forcedErrorState = true; }
+
+
+ void initCondSpaceList( ulong length );
+ void condSpaceItem( int cnum, long condActionId );
+ void newCondSpace( int cnum, int condSpaceId, Key baseKey );
+
+ void initStateCondList( int snum, ulong length );
+ void addStateCond( int snum, Key lowKey, Key highKey, long condNum );
+
+ CondSpace *findCondSpace( Key lowKey, Key highKey );
+ Condition *findCondition( Key key );
+
+ bool setAlphType( char *data );
+
+ void resolveTargetStates( InlineList *inlineList );
+ Key findMaxKey();
+
+ /* Gather various info on the machine. */
+ void analyzeActionList( RedAction *redAct, InlineList *inlineList );
+ void analyzeAction( Action *act, InlineList *inlineList );
+ void findFinalActionRefs();
+ void analyzeMachine();
+
+ void closeMachine();
+ void setValueLimits();
+ void assignActionIds();
+
+ ostream &source_warning( const InputLoc &loc );
+ ostream &source_error( const InputLoc &loc );
+};
+
+
+#endif /* _GENDATA_H */
diff --git a/contrib/tools/ragel5/redfsm/phash.h b/contrib/tools/ragel5/redfsm/phash.h
new file mode 100644
index 0000000000..11ce7502a6
--- /dev/null
+++ b/contrib/tools/ragel5/redfsm/phash.h
@@ -0,0 +1,10 @@
+#pragma once
+
+class Perfect_Hash
+{
+private:
+ static inline unsigned int hash (const char *str, unsigned int len);
+
+public:
+ static struct XMLTagHashPair *in_word_set (const char *str, unsigned int len);
+};
diff --git a/contrib/tools/ragel5/redfsm/redfsm.cpp b/contrib/tools/ragel5/redfsm/redfsm.cpp
new file mode 100644
index 0000000000..6a55b22ec7
--- /dev/null
+++ b/contrib/tools/ragel5/redfsm/redfsm.cpp
@@ -0,0 +1,559 @@
+/*
+ * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "redfsm.h"
+#include "avlmap.h"
+#include <iostream>
+#include <sstream>
+
+using std::ostringstream;
+
+KeyOps *keyOps = 0;
+
+string Action::nameOrLoc()
+{
+ if ( name != 0 )
+ return string(name);
+ else {
+ ostringstream ret;
+ ret << loc.line << ":" << loc.col;
+ return ret.str();
+ }
+}
+
+RedFsmAp::RedFsmAp()
+:
+ wantComplete(false),
+ forcedErrorState(false),
+ nextActionId(0),
+ nextTransId(0),
+ startState(0),
+ errState(0),
+ errTrans(0),
+ firstFinState(0),
+ numFinStates(0),
+ bAnyToStateActions(false),
+ bAnyFromStateActions(false),
+ bAnyRegActions(false),
+ bAnyEofActions(false),
+ bAnyActionGotos(false),
+ bAnyActionCalls(false),
+ bAnyActionRets(false),
+ bAnyRegActionRets(false),
+ bAnyRegActionByValControl(false),
+ bAnyRegNextStmt(false),
+ bAnyRegCurStateRef(false),
+ bAnyRegBreak(false),
+ bAnyLmSwitchError(false),
+ bAnyConditions(false)
+{
+}
+
+/* Does the machine have any actions. */
+bool RedFsmAp::anyActions()
+{
+ return actionMap.length() > 0;
+}
+
+void RedFsmAp::depthFirstOrdering( RedStateAp *state )
+{
+ /* Nothing to do if the state is already on the list. */
+ if ( state->onStateList )
+ return;
+
+ /* Doing depth first, put state on the list. */
+ state->onStateList = true;
+ stateList.append( state );
+
+ /* At this point transitions should only be in ranges. */
+ assert( state->outSingle.length() == 0 );
+ assert( state->defTrans == 0 );
+
+ /* Recurse on everything ranges. */
+ for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) {
+ if ( rtel->value->targ != 0 )
+ depthFirstOrdering( rtel->value->targ );
+ }
+}
+
+/* Ordering states by transition connections. */
+void RedFsmAp::depthFirstOrdering()
+{
+ /* Init on state list flags. */
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ )
+ st->onStateList = false;
+
+ /* Clear out the state list, we will rebuild it. */
+ int stateListLen = stateList.length();
+ stateList.abandon();
+
+ /* Add back to the state list from the start state and all other entry
+ * points. */
+ if ( startState != 0 )
+ depthFirstOrdering( startState );
+ for ( RedStateSet::Iter en = entryPoints; en.lte(); en++ )
+ depthFirstOrdering( *en );
+ if ( forcedErrorState )
+ depthFirstOrdering( errState );
+
+ /* Make sure we put everything back on. */
+ assert( stateListLen == stateList.length() );
+}
+
+/* Assign state ids by appearance in the state list. */
+void RedFsmAp::sequentialStateIds()
+{
+ /* Table based machines depend on the state numbers starting at zero. */
+ nextStateId = 0;
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ )
+ st->id = nextStateId++;
+}
+
+/* Stable sort the states by final state status. */
+void RedFsmAp::sortStatesByFinal()
+{
+ /* Move forward through the list and throw final states onto the end. */
+ RedStateAp *state = 0;
+ RedStateAp *next = stateList.head;
+ RedStateAp *last = stateList.tail;
+ while ( state != last ) {
+ /* Move forward and load up the next. */
+ state = next;
+ next = state->next;
+
+ /* Throw to the end? */
+ if ( state->isFinal ) {
+ stateList.detach( state );
+ stateList.append( state );
+ }
+ }
+}
+
+/* Assign state ids by final state state status. */
+void RedFsmAp::sortStateIdsByFinal()
+{
+ /* Table based machines depend on this starting at zero. */
+ nextStateId = 0;
+
+ /* First pass to assign non final ids. */
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ if ( ! st->isFinal )
+ st->id = nextStateId++;
+ }
+
+ /* Second pass to assign final ids. */
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ if ( st->isFinal )
+ st->id = nextStateId++;
+ }
+}
+
+void RedFsmAp::sortByStateId()
+{
+ /* FIXME: Implement. */
+}
+
+/* Find the final state with the lowest id. */
+void RedFsmAp::findFirstFinState()
+{
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ if ( st->isFinal && (firstFinState == 0 || st->id < firstFinState->id) )
+ firstFinState = st;
+ }
+}
+
+void RedFsmAp::assignActionLocs()
+{
+ int nextLocation = 0;
+ for ( ActionTableMap::Iter act = actionMap; act.lte(); act++ ) {
+ /* Store the loc, skip over the array and a null terminator. */
+ act->location = nextLocation;
+ nextLocation += act->key.length() + 1;
+ }
+}
+
+/* Check if we can extend the current range by displacing any ranges
+ * ahead to the singles. */
+bool RedFsmAp::canExtend( const RedTransList &list, int pos )
+{
+ /* Get the transition that we want to extend. */
+ RedTransAp *extendTrans = list[pos].value;
+
+ /* Look ahead in the transition list. */
+ for ( int next = pos + 1; next < list.length(); pos++, next++ ) {
+ /* If they are not continuous then cannot extend. */
+ Key nextKey = list[next].lowKey;
+ nextKey.decrement();
+ if ( list[pos].highKey != nextKey )
+ break;
+
+ /* Check for the extenstion property. */
+ if ( extendTrans == list[next].value )
+ return true;
+
+ /* If the span of the next element is more than one, then don't keep
+ * checking, it won't be moved to single. */
+ unsigned long long nextSpan = keyOps->span( list[next].lowKey, list[next].highKey );
+ if ( nextSpan > 1 )
+ break;
+ }
+ return false;
+}
+
+/* Move ranges to the singles list. */
+void RedFsmAp::moveTransToSingle( RedStateAp *state )
+{
+ RedTransList &range = state->outRange;
+ RedTransList &single = state->outSingle;
+ for ( int rpos = 0; rpos < range.length(); ) {
+ /* Check if this is a range we can extend. */
+ if ( canExtend( range, rpos ) ) {
+ /* Transfer singles over. */
+ while ( range[rpos].value != range[rpos+1].value ) {
+ /* Transfer the range to single. */
+ single.append( range[rpos+1] );
+ range.remove( rpos+1 );
+ }
+
+ /* Extend. */
+ range[rpos].highKey = range[rpos+1].highKey;
+ range.remove( rpos+1 );
+ }
+ /* Maybe move it to the singles. */
+ else if ( keyOps->span( range[rpos].lowKey, range[rpos].highKey ) == 1 ) {
+ single.append( range[rpos] );
+ range.remove( rpos );
+ }
+ else {
+ /* Keeping it in the ranges. */
+ rpos += 1;
+ }
+ }
+}
+
+/* Look through ranges and choose suitable single character transitions. */
+void RedFsmAp::chooseSingle()
+{
+ /* Loop the states. */
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ /* Rewrite the transition list taking out the suitable single
+ * transtions. */
+ moveTransToSingle( st );
+ }
+}
+
+void RedFsmAp::makeFlat()
+{
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ if ( st->stateCondList.length() == 0 ) {
+ st->condLowKey = 0;
+ st->condHighKey = 0;
+ }
+ else {
+ st->condLowKey = st->stateCondList.head->lowKey;
+ st->condHighKey = st->stateCondList.tail->highKey;
+
+ unsigned long long span = keyOps->span( st->condLowKey, st->condHighKey );
+ st->condList = new CondSpace*[ span ];
+ memset( st->condList, 0, sizeof(CondSpace*)*span );
+
+ for ( StateCondList::Iter sci = st->stateCondList; sci.lte(); sci++ ) {
+ unsigned long long base, trSpan;
+ base = keyOps->span( st->condLowKey, sci->lowKey )-1;
+ trSpan = keyOps->span( sci->lowKey, sci->highKey );
+ for ( unsigned long long pos = 0; pos < trSpan; pos++ )
+ st->condList[base+pos] = sci->condSpace;
+ }
+ }
+
+ if ( st->outRange.length() == 0 ) {
+ st->lowKey = st->highKey = 0;
+ st->transList = 0;
+ }
+ else {
+ st->lowKey = st->outRange[0].lowKey;
+ st->highKey = st->outRange[st->outRange.length()-1].highKey;
+ unsigned long long span = keyOps->span( st->lowKey, st->highKey );
+ st->transList = new RedTransAp*[ span ];
+ memset( st->transList, 0, sizeof(RedTransAp*)*span );
+
+ for ( RedTransList::Iter trans = st->outRange; trans.lte(); trans++ ) {
+ unsigned long long base, trSpan;
+ base = keyOps->span( st->lowKey, trans->lowKey )-1;
+ trSpan = keyOps->span( trans->lowKey, trans->highKey );
+ for ( unsigned long long pos = 0; pos < trSpan; pos++ )
+ st->transList[base+pos] = trans->value;
+ }
+
+ /* Fill in the gaps with the default transition. */
+ for ( unsigned long long pos = 0; pos < span; pos++ ) {
+ if ( st->transList[pos] == 0 )
+ st->transList[pos] = st->defTrans;
+ }
+ }
+ }
+}
+
+
+/* A default transition has been picked, move it from the outRange to the
+ * default pointer. */
+void RedFsmAp::moveToDefault( RedTransAp *defTrans, RedStateAp *state )
+{
+ /* Rewrite the outRange, omitting any ranges that use
+ * the picked default. */
+ RedTransList outRange;
+ for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) {
+ /* If it does not take the default, copy it over. */
+ if ( rtel->value != defTrans )
+ outRange.append( *rtel );
+ }
+
+ /* Save off the range we just created into the state's range. */
+ state->outRange.transfer( outRange );
+
+ /* Store the default. */
+ state->defTrans = defTrans;
+}
+
+bool RedFsmAp::alphabetCovered( RedTransList &outRange )
+{
+ /* Cannot cover without any out ranges. */
+ if ( outRange.length() == 0 )
+ return false;
+
+ /* If the first range doesn't start at the the lower bound then the
+ * alphabet is not covered. */
+ RedTransList::Iter rtel = outRange;
+ if ( keyOps->minKey < rtel->lowKey )
+ return false;
+
+ /* Check that every range is next to the previous one. */
+ rtel.increment();
+ for ( ; rtel.lte(); rtel++ ) {
+ Key highKey = rtel[-1].highKey;
+ highKey.increment();
+ if ( highKey != rtel->lowKey )
+ return false;
+ }
+
+ /* The last must extend to the upper bound. */
+ RedTransEl *last = &outRange[outRange.length()-1];
+ if ( last->highKey < keyOps->maxKey )
+ return false;
+
+ return true;
+}
+
+RedTransAp *RedFsmAp::chooseDefaultSpan( RedStateAp *state )
+{
+ /* Make a set of transitions from the outRange. */
+ RedTransSet stateTransSet;
+ for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ )
+ stateTransSet.insert( rtel->value );
+
+ /* For each transition in the find how many alphabet characters the
+ * transition spans. */
+ unsigned long long *span = new unsigned long long[stateTransSet.length()];
+ memset( span, 0, sizeof(unsigned long long) * stateTransSet.length() );
+ for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) {
+ /* Lookup the transition in the set. */
+ RedTransAp **inSet = stateTransSet.find( rtel->value );
+ int pos = inSet - stateTransSet.data;
+ span[pos] += keyOps->span( rtel->lowKey, rtel->highKey );
+ }
+
+ /* Find the max span, choose it for making the default. */
+ RedTransAp *maxTrans = 0;
+ unsigned long long maxSpan = 0;
+ for ( RedTransSet::Iter rtel = stateTransSet; rtel.lte(); rtel++ ) {
+ if ( span[rtel.pos()] > maxSpan ) {
+ maxSpan = span[rtel.pos()];
+ maxTrans = *rtel;
+ }
+ }
+
+ delete[] span;
+ return maxTrans;
+}
+
+/* Pick default transitions from ranges for the states. */
+void RedFsmAp::chooseDefaultSpan()
+{
+ /* Loop the states. */
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ /* Only pick a default transition if the alphabet is covered. This
+ * avoids any transitions in the out range that go to error and avoids
+ * the need for an ERR state. */
+ if ( alphabetCovered( st->outRange ) ) {
+ /* Pick a default transition by largest span. */
+ RedTransAp *defTrans = chooseDefaultSpan( st );
+
+ /* Rewrite the transition list taking out the transition we picked
+ * as the default and store the default. */
+ moveToDefault( defTrans, st );
+ }
+ }
+}
+
+RedTransAp *RedFsmAp::chooseDefaultGoto( RedStateAp *state )
+{
+ /* Make a set of transitions from the outRange. */
+ RedTransSet stateTransSet;
+ for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) {
+ if ( rtel->value->targ == state->next )
+ return rtel->value;
+ }
+ return 0;
+}
+
+void RedFsmAp::chooseDefaultGoto()
+{
+ /* Loop the states. */
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ /* Pick a default transition. */
+ RedTransAp *defTrans = chooseDefaultGoto( st );
+ if ( defTrans == 0 )
+ defTrans = chooseDefaultSpan( st );
+
+ /* Rewrite the transition list taking out the transition we picked
+ * as the default and store the default. */
+ moveToDefault( defTrans, st );
+ }
+}
+
+RedTransAp *RedFsmAp::chooseDefaultNumRanges( RedStateAp *state )
+{
+ /* Make a set of transitions from the outRange. */
+ RedTransSet stateTransSet;
+ for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ )
+ stateTransSet.insert( rtel->value );
+
+ /* For each transition in the find how many ranges use the transition. */
+ int *numRanges = new int[stateTransSet.length()];
+ memset( numRanges, 0, sizeof(int) * stateTransSet.length() );
+ for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) {
+ /* Lookup the transition in the set. */
+ RedTransAp **inSet = stateTransSet.find( rtel->value );
+ numRanges[inSet - stateTransSet.data] += 1;
+ }
+
+ /* Find the max number of ranges. */
+ RedTransAp *maxTrans = 0;
+ int maxNumRanges = 0;
+ for ( RedTransSet::Iter rtel = stateTransSet; rtel.lte(); rtel++ ) {
+ if ( numRanges[rtel.pos()] > maxNumRanges ) {
+ maxNumRanges = numRanges[rtel.pos()];
+ maxTrans = *rtel;
+ }
+ }
+
+ delete[] numRanges;
+ return maxTrans;
+}
+
+void RedFsmAp::chooseDefaultNumRanges()
+{
+ /* Loop the states. */
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ /* Pick a default transition. */
+ RedTransAp *defTrans = chooseDefaultNumRanges( st );
+
+ /* Rewrite the transition list taking out the transition we picked
+ * as the default and store the default. */
+ moveToDefault( defTrans, st );
+ }
+}
+
+RedTransAp *RedFsmAp::getErrorTrans( )
+{
+ /* If the error trans has not been made aready, make it. */
+ if ( errTrans == 0 ) {
+ /* This insert should always succeed since no transition created by
+ * the user can point to the error state. */
+ errTrans = new RedTransAp( getErrorState(), 0, nextTransId++ );
+ RedTransAp *inRes = transSet.insert( errTrans );
+ assert( inRes != 0 );
+ }
+ return errTrans;
+}
+
+RedStateAp *RedFsmAp::getErrorState()
+{
+ /* Something went wrong. An error state is needed but one was not supplied
+ * by the frontend. */
+ assert( errState != 0 );
+ return errState;
+}
+
+
+RedTransAp *RedFsmAp::allocateTrans( RedStateAp *targ, RedAction *action )
+{
+ /* Create a reduced trans and look for it in the transiton set. */
+ RedTransAp redTrans( targ, action, 0 );
+ RedTransAp *inDict = transSet.find( &redTrans );
+ if ( inDict == 0 ) {
+ inDict = new RedTransAp( targ, action, nextTransId++ );
+ transSet.insert( inDict );
+ }
+ return inDict;
+}
+
+void RedFsmAp::partitionFsm( int nparts )
+{
+ /* At this point the states are ordered by a depth-first traversal. We
+ * will allocate to partitions based on this ordering. */
+ this->nParts = nparts;
+ int partSize = stateList.length() / nparts;
+ int remainder = stateList.length() % nparts;
+ int numInPart = partSize;
+ int partition = 0;
+ if ( remainder-- > 0 )
+ numInPart += 1;
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ st->partition = partition;
+
+ numInPart -= 1;
+ if ( numInPart == 0 ) {
+ partition += 1;
+ numInPart = partSize;
+ if ( remainder-- > 0 )
+ numInPart += 1;
+ }
+ }
+}
+
+void RedFsmAp::setInTrans()
+{
+ /* First pass counts the number of transitions. */
+ for ( TransApSet::Iter trans = transSet; trans.lte(); trans++ )
+ trans->targ->numInTrans += 1;
+
+ /* Pass over states to allocate the needed memory. Reset the counts so we
+ * can use them as the current size. */
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ st->inTrans = new RedTransAp*[st->numInTrans];
+ st->numInTrans = 0;
+ }
+
+ /* Second pass over transitions copies pointers into the in trans list. */
+ for ( TransApSet::Iter trans = transSet; trans.lte(); trans++ )
+ trans->targ->inTrans[trans->targ->numInTrans++] = trans;
+}
diff --git a/contrib/tools/ragel5/redfsm/redfsm.h b/contrib/tools/ragel5/redfsm/redfsm.h
new file mode 100644
index 0000000000..515b1b621b
--- /dev/null
+++ b/contrib/tools/ragel5/redfsm/redfsm.h
@@ -0,0 +1,534 @@
+/*
+ * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _REDFSM_H
+#define _REDFSM_H
+
+#include <assert.h>
+#include <string.h>
+#include <string>
+#include "common.h"
+#include "vector.h"
+#include "dlist.h"
+#include "compare.h"
+#include "bstmap.h"
+#include "bstset.h"
+#include "avlmap.h"
+#include "avltree.h"
+#include "avlbasic.h"
+#include "mergesort.h"
+#include "sbstmap.h"
+#include "sbstset.h"
+#include "sbsttable.h"
+
+#define TRANS_ERR_TRANS 0
+#define STATE_ERR_STATE 0
+#define FUNC_NO_FUNC 0
+
+using std::string;
+
+struct RedStateAp;
+struct InlineList;
+struct Action;
+
+/* Location in an input file. */
+struct InputLoc
+{
+ int line;
+ int col;
+};
+
+/*
+ * Inline code tree
+ */
+struct InlineItem
+{
+ enum Type
+ {
+ Text, Goto, Call, Next, GotoExpr, CallExpr, NextExpr, Ret,
+ PChar, Char, Hold, Exec, HoldTE, ExecTE, Curs, Targs, Entry,
+ LmSwitch, LmSetActId, LmSetTokEnd, LmGetTokEnd, LmInitTokStart,
+ LmInitAct, LmSetTokStart, SubAction, Break
+ };
+
+ InlineItem( const InputLoc &loc, Type type ) :
+ loc(loc), data(0), targId(0), targState(0),
+ lmId(0), children(0), offset(0),
+ handlesError(false), type(type) { }
+
+ InputLoc loc;
+ char *data;
+ int targId;
+ RedStateAp *targState;
+ int lmId;
+ InlineList *children;
+ int offset;
+ bool handlesError;
+ Type type;
+
+ InlineItem *prev, *next;
+};
+
+/* Normally this would be atypedef, but that would entail including DList from
+ * ptreetypes, which should be just typedef forwards. */
+struct InlineList : public DList<InlineItem> { };
+
+/* Element in list of actions. Contains the string for the code to exectute. */
+struct Action
+:
+ public DListEl<Action>
+{
+ Action( )
+ :
+ name(0),
+ inlineList(0),
+ actionId(0),
+ numTransRefs(0),
+ numToStateRefs(0),
+ numFromStateRefs(0),
+ numEofRefs(0)
+ {
+ }
+
+ /* Data collected during parse. */
+ InputLoc loc;
+ char *name;
+ InlineList *inlineList;
+ int actionId;
+
+ string nameOrLoc();
+
+ /* Number of references in the final machine. */
+ int numRefs()
+ { return numTransRefs + numToStateRefs + numFromStateRefs + numEofRefs; }
+ int numTransRefs;
+ int numToStateRefs;
+ int numFromStateRefs;
+ int numEofRefs;
+};
+
+
+/* Forwards. */
+struct RedStateAp;
+struct StateAp;
+
+/* Transistion Action Element. */
+typedef SBstMapEl< int, Action* > ActionTableEl;
+
+/* Transition Action Table. */
+struct ActionTable
+ : public SBstMap< int, Action*, CmpOrd<int> >
+{
+ void setAction( int ordering, Action *action );
+ void setActions( int *orderings, Action **actions, int nActs );
+ void setActions( const ActionTable &other );
+};
+
+/* Compare of a whole action table element (key & value). */
+struct CmpActionTableEl
+{
+ static int compare( const ActionTableEl &action1,
+ const ActionTableEl &action2 )
+ {
+ if ( action1.key < action2.key )
+ return -1;
+ else if ( action1.key > action2.key )
+ return 1;
+ else if ( action1.value < action2.value )
+ return -1;
+ else if ( action1.value > action2.value )
+ return 1;
+ return 0;
+ }
+};
+
+/* Compare for ActionTable. */
+typedef CmpSTable< ActionTableEl, CmpActionTableEl > CmpActionTable;
+
+/* Set of states. */
+typedef BstSet<RedStateAp*> RedStateSet;
+typedef BstSet<int> IntSet;
+
+/* Reduced action. */
+struct RedAction
+:
+ public AvlTreeEl<RedAction>
+{
+ RedAction( )
+ :
+ key(),
+ eofRefs(0),
+ numTransRefs(0),
+ numToStateRefs(0),
+ numFromStateRefs(0),
+ numEofRefs(0),
+ bAnyNextStmt(false),
+ bAnyCurStateRef(false),
+ bAnyBreakStmt(false)
+ { }
+
+ const ActionTable &getKey()
+ { return key; }
+
+ ActionTable key;
+ int actListId;
+ int location;
+ IntSet *eofRefs;
+
+ /* Number of references in the final machine. */
+ int numRefs()
+ { return numTransRefs + numToStateRefs + numFromStateRefs + numEofRefs; }
+ int numTransRefs;
+ int numToStateRefs;
+ int numFromStateRefs;
+ int numEofRefs;
+
+ bool anyNextStmt() { return bAnyNextStmt; }
+ bool anyCurStateRef() { return bAnyCurStateRef; }
+ bool anyBreakStmt() { return bAnyBreakStmt; }
+
+ bool bAnyNextStmt;
+ bool bAnyCurStateRef;
+ bool bAnyBreakStmt;
+};
+typedef AvlTree<RedAction, ActionTable, CmpActionTable> ActionTableMap;
+
+/* Reduced transition. */
+struct RedTransAp
+:
+ public AvlTreeEl<RedTransAp>
+{
+ RedTransAp( RedStateAp *targ, RedAction *action, int id )
+ : targ(targ), action(action), id(id), labelNeeded(true) { }
+
+ RedStateAp *targ;
+ RedAction *action;
+ int id;
+ bool partitionBoundary;
+ bool labelNeeded;
+};
+
+/* Compare of transitions for the final reduction of transitions. Comparison
+ * is on target and the pointer to the shared action table. It is assumed that
+ * when this is used the action tables have been reduced. */
+struct CmpRedTransAp
+{
+ static int compare( const RedTransAp &t1, const RedTransAp &t2 )
+ {
+ if ( t1.targ < t2.targ )
+ return -1;
+ else if ( t1.targ > t2.targ )
+ return 1;
+ else if ( t1.action < t2.action )
+ return -1;
+ else if ( t1.action > t2.action )
+ return 1;
+ else
+ return 0;
+ }
+};
+
+typedef AvlBasic<RedTransAp, CmpRedTransAp> TransApSet;
+
+/* Element in out range. */
+struct RedTransEl
+{
+ /* Constructors. */
+ RedTransEl( Key lowKey, Key highKey, RedTransAp *value )
+ : lowKey(lowKey), highKey(highKey), value(value) { }
+
+ Key lowKey, highKey;
+ RedTransAp *value;
+};
+
+typedef Vector<RedTransEl> RedTransList;
+typedef Vector<RedStateAp*> RedStateVect;
+
+typedef BstMapEl<RedStateAp*, unsigned long long> RedSpanMapEl;
+typedef BstMap<RedStateAp*, unsigned long long> RedSpanMap;
+
+/* Compare used by span map sort. Reverse sorts by the span. */
+struct CmpRedSpanMapEl
+{
+ static int compare( const RedSpanMapEl &smel1, const RedSpanMapEl &smel2 )
+ {
+ if ( smel1.value > smel2.value )
+ return -1;
+ else if ( smel1.value < smel2.value )
+ return 1;
+ else
+ return 0;
+ }
+};
+
+/* Sorting state-span map entries by span. */
+typedef MergeSort<RedSpanMapEl, CmpRedSpanMapEl> RedSpanMapSort;
+
+/* Set of entry ids that go into this state. */
+typedef Vector<int> EntryIdVect;
+typedef Vector<char*> EntryNameVect;
+
+typedef Vector< Action* > CondSet;
+
+struct Condition
+{
+ Condition( )
+ : key(0), baseKey(0) {}
+
+ Key key;
+ Key baseKey;
+ CondSet condSet;
+
+ Condition *next, *prev;
+};
+typedef DList<Condition> ConditionList;
+
+struct CondSpace
+{
+ Key baseKey;
+ CondSet condSet;
+ int condSpaceId;
+
+ CondSpace *next, *prev;
+};
+typedef DList<CondSpace> CondSpaceList;
+
+struct StateCond
+{
+ Key lowKey;
+ Key highKey;
+
+ CondSpace *condSpace;
+
+ StateCond *prev, *next;
+};
+typedef DList<StateCond> StateCondList;
+typedef Vector<StateCond*> StateCondVect;
+
+/* Reduced state. */
+struct RedStateAp
+{
+ RedStateAp()
+ :
+ defTrans(0),
+ condList(0),
+ transList(0),
+ isFinal(false),
+ labelNeeded(false),
+ outNeeded(false),
+ onStateList(false),
+ toStateAction(0),
+ fromStateAction(0),
+ eofAction(0),
+ id(0),
+ bAnyRegCurStateRef(false),
+ partitionBoundary(false),
+ inTrans(0),
+ numInTrans(0)
+ { }
+
+ /* Transitions out. */
+ RedTransList outSingle;
+ RedTransList outRange;
+ RedTransAp *defTrans;
+
+ /* For flat conditions. */
+ Key condLowKey, condHighKey;
+ CondSpace **condList;
+
+ /* For flat keys. */
+ Key lowKey, highKey;
+ RedTransAp **transList;
+
+ /* The list of states that transitions from this state go to. */
+ RedStateVect targStates;
+
+ bool isFinal;
+ bool labelNeeded;
+ bool outNeeded;
+ bool onStateList;
+ RedAction *toStateAction;
+ RedAction *fromStateAction;
+ RedAction *eofAction;
+ int id;
+ StateCondList stateCondList;
+ StateCondVect stateCondVect;
+
+ /* Pointers for the list of states. */
+ RedStateAp *prev, *next;
+
+ bool anyRegCurStateRef() { return bAnyRegCurStateRef; }
+ bool bAnyRegCurStateRef;
+
+ int partition;
+ bool partitionBoundary;
+
+ RedTransAp **inTrans;
+ int numInTrans;
+};
+
+/* List of states. */
+typedef DList<RedStateAp> RedStateList;
+
+/* Set of reduced transitons. Comparison is by pointer. */
+typedef BstSet< RedTransAp*, CmpOrd<RedTransAp*> > RedTransSet;
+
+/* Next version of the fsm machine. */
+struct RedFsmAp
+{
+ RedFsmAp();
+
+ bool wantComplete;
+ bool forcedErrorState;
+
+ int nextActionId;
+ int nextTransId;
+
+ /* Next State Id doubles as the total number of state ids. */
+ int nextStateId;
+
+ TransApSet transSet;
+ ActionTableMap actionMap;
+ RedStateList stateList;
+ RedStateSet entryPoints;
+ RedStateAp *startState;
+ RedStateAp *errState;
+ RedTransAp *errTrans;
+ RedTransAp *errActionTrans;
+ RedStateAp *firstFinState;
+ int numFinStates;
+ int nParts;
+
+ bool bAnyToStateActions;
+ bool bAnyFromStateActions;
+ bool bAnyRegActions;
+ bool bAnyEofActions;
+ bool bAnyActionGotos;
+ bool bAnyActionCalls;
+ bool bAnyActionRets;
+ bool bAnyRegActionRets;
+ bool bAnyRegActionByValControl;
+ bool bAnyRegNextStmt;
+ bool bAnyRegCurStateRef;
+ bool bAnyRegBreak;
+ bool bAnyLmSwitchError;
+ bool bAnyConditions;
+
+ int maxState;
+ int maxSingleLen;
+ int maxRangeLen;
+ int maxKeyOffset;
+ int maxIndexOffset;
+ int maxIndex;
+ int maxActListId;
+ int maxActionLoc;
+ int maxActArrItem;
+ unsigned long long maxSpan;
+ unsigned long long maxCondSpan;
+ int maxFlatIndexOffset;
+ Key maxKey;
+ int maxCondOffset;
+ int maxCondLen;
+ int maxCondSpaceId;
+ int maxCondIndexOffset;
+ int maxCond;
+
+ bool anyActions();
+ bool anyToStateActions() { return bAnyToStateActions; }
+ bool anyFromStateActions() { return bAnyFromStateActions; }
+ bool anyRegActions() { return bAnyRegActions; }
+ bool anyEofActions() { return bAnyEofActions; }
+ bool anyActionGotos() { return bAnyActionGotos; }
+ bool anyActionCalls() { return bAnyActionCalls; }
+ bool anyActionRets() { return bAnyActionRets; }
+ bool anyRegActionRets() { return bAnyRegActionRets; }
+ bool anyRegActionByValControl() { return bAnyRegActionByValControl; }
+ bool anyRegNextStmt() { return bAnyRegNextStmt; }
+ bool anyRegCurStateRef() { return bAnyRegCurStateRef; }
+ bool anyRegBreak() { return bAnyRegBreak; }
+ bool anyLmSwitchError() { return bAnyLmSwitchError; }
+ bool anyConditions() { return bAnyConditions; }
+
+
+ /* Is is it possible to extend a range by bumping ranges that span only
+ * one character to the singles array. */
+ bool canExtend( const RedTransList &list, int pos );
+
+ /* Pick single transitions from the ranges. */
+ void moveTransToSingle( RedStateAp *state );
+ void chooseSingle();
+
+ void makeFlat();
+
+ /* Move a selected transition from ranges to default. */
+ void moveToDefault( RedTransAp *defTrans, RedStateAp *state );
+
+ /* Pick a default transition by largest span. */
+ RedTransAp *chooseDefaultSpan( RedStateAp *state );
+ void chooseDefaultSpan();
+
+ /* Pick a default transition by most number of ranges. */
+ RedTransAp *chooseDefaultNumRanges( RedStateAp *state );
+ void chooseDefaultNumRanges();
+
+ /* Pick a default transition tailored towards goto driven machine. */
+ RedTransAp *chooseDefaultGoto( RedStateAp *state );
+ void chooseDefaultGoto();
+
+ /* Ordering states by transition connections. */
+ void optimizeStateOrdering( RedStateAp *state );
+ void optimizeStateOrdering();
+
+ /* Ordering states by transition connections. */
+ void depthFirstOrdering( RedStateAp *state );
+ void depthFirstOrdering();
+
+ /* Set state ids. */
+ void sequentialStateIds();
+ void sortStateIdsByFinal();
+
+ /* Arrange states in by final id. This is a stable sort. */
+ void sortStatesByFinal();
+
+ /* Sorting states by id. */
+ void sortByStateId();
+
+ /* Locating the first final state. This is the final state with the lowest
+ * id. */
+ void findFirstFinState();
+
+ void assignActionLocs();
+
+ RedTransAp *getErrorTrans();
+ RedStateAp *getErrorState();
+
+ /* Is every char in the alphabet covered? */
+ bool alphabetCovered( RedTransList &outRange );
+
+ RedTransAp *allocateTrans( RedStateAp *targState, RedAction *actionTable );
+
+ void partitionFsm( int nParts );
+
+ void setInTrans();
+};
+
+
+#endif /* _REDFSM_H */
diff --git a/contrib/tools/ragel5/redfsm/xmlparse.cpp b/contrib/tools/ragel5/redfsm/xmlparse.cpp
new file mode 100644
index 0000000000..6da8c50e91
--- /dev/null
+++ b/contrib/tools/ragel5/redfsm/xmlparse.cpp
@@ -0,0 +1,3549 @@
+/* Automatically generated by Kelbt from "xmlparse.kl".
+ *
+ * Parts of this file are copied from Kelbt source covered by the GNU
+ * GPL. As a special exception, you may use the parts of this file copied
+ * from Kelbt source without restriction. The remainder is derived from
+ * "xmlparse.kl" and inherits the copyright status of that file.
+ */
+
+#line 1 "xmlparse.kl"
+/*
+ * Copyright 2001-2007 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "xmlparse.h"
+#include "common.h"
+#include "gendata.h"
+#include <iostream>
+
+#include <stdlib.h>
+//#include <malloc.h>
+
+using std::cout;
+using std::ostream;
+using std::istream;
+using std::cerr;
+using std::endl;
+
+Key readKey( char *td, char **end );
+long readOffsetPtr( char *td, char **end );
+unsigned long readLength( char *td );
+
+#line 117 "xmlparse.kh"
+#line 120 "xmlparse.kh"
+#line 163 "xmlparse.kh"
+#line 846 "xmlparse.kl"
+
+
+#line 54 "xmlparse.cpp"
+struct Parser_Lel_inline_item_type
+{
+#line 499 "xmlparse.kl"
+
+ InlineItem *inlineItem;
+
+
+#line 61 "xmlparse.cpp"
+};
+
+struct Parser_Lel_inline_list
+{
+#line 480 "xmlparse.kl"
+
+ InlineList *inlineList;
+
+
+#line 71 "xmlparse.cpp"
+};
+
+struct Parser_Lel_lm_action_list
+{
+#line 716 "xmlparse.kl"
+
+ InlineList *inlineList;
+
+
+#line 81 "xmlparse.cpp"
+};
+
+struct Parser_Lel_tag_arg
+{
+#line 256 "xmlparse.kl"
+
+ char *option;
+
+
+#line 91 "xmlparse.cpp"
+};
+
+struct Parser_Lel_tag_write_head
+{
+#line 220 "xmlparse.kl"
+
+ InputLoc loc;
+
+
+#line 101 "xmlparse.cpp"
+};
+
+union Parser_UserData
+{
+ struct Parser_Lel_inline_item_type inline_item_type;
+ struct Parser_Lel_inline_list inline_list;
+ struct Parser_Lel_lm_action_list lm_action_list;
+ struct Parser_Lel_tag_arg tag_arg;
+ struct Parser_Lel_tag_write_head tag_write_head;
+ struct Token token;
+};
+
+struct Parser_LangEl
+{
+ char *file;
+ int line;
+ int type;
+ int reduction;
+ int state;
+ union Parser_UserData user;
+ unsigned int retry;
+ struct Parser_LangEl *next, *child;
+};
+
+#line 127 "xmlparse.cpp"
+unsigned int Parser_startState = 0;
+
+short Parser_indicies[] = {
+ 142, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 140, 139, 0, 1, 283, 144, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 144, 144, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 144, -1, -1, -1, -1, -1,
+ -1, -1, -1, 2, 146, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 151,
+ 146, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 146, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 3, 143, -1, -1, -1,
+ 4, 5, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, 6, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 169, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 145, 147, 148, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 7, 153, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 153, -1, -1, -1, -1,
+ -1, -1, 153, -1, 153, -1, -1, -1,
+ -1, -1, -1, -1, 153, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 153, 153, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 8,
+ 141, 9, 171, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 171, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 10, 11, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, 174, -1, -1,
+ -1, -1, -1, -1, 12, -1, 13, -1,
+ -1, -1, -1, -1, -1, -1, 16, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 15, 14, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 152, 154, 155, 156, 157, 158,
+ 159, -1, -1, -1, -1, -1, -1, 17,
+ 149, 18, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 19, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 170, 150, 20, 217, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 217, -1, -1, -1, -1,
+ -1, -1, 217, 217, 217, 217, 217, 217,
+ 217, 217, 217, 217, 217, 217, 217, 217,
+ 217, 217, 217, -1, 217, 217, 217, 217,
+ 217, 217, 217, -1, -1, -1, 217, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, 21, 217, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 217, -1, -1, -1, -1,
+ -1, -1, 217, 217, 217, 217, 217, 217,
+ 217, 217, 217, 217, 217, 217, 217, 217,
+ 217, 217, 217, -1, 217, 217, 217, 217,
+ 217, 217, 217, -1, -1, -1, 217, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, 24, 217, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 217, -1, -1, -1, -1,
+ -1, -1, 217, 217, 217, 217, 217, 217,
+ 217, 217, 217, 217, 217, 217, 217, 217,
+ 217, 217, 217, -1, 217, 217, 217, 217,
+ 217, 217, 217, -1, -1, -1, 217, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, 23, 162, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 162, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 22, 176, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 176, -1, -1, -1, -1, 176, 176,
+ 176, 176, -1, -1, -1, -1, -1, -1,
+ 176, -1, 176, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 25, 168, 26, 164, 27, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 52, -1, -1, -1, -1, -1, -1,
+ 28, 29, 30, 31, 32, 33, 34, 35,
+ 37, 38, 39, 40, 41, 42, 43, 44,
+ 45, -1, 53, 47, 51, 50, 48, 46,
+ 49, -1, -1, -1, 36, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 216, -1, 218, 219,
+ 220, 221, 222, 223, 224, 225, 226, 227,
+ 228, 229, 230, 231, 232, 233, 234, 235,
+ 236, 237, 238, 239, 240, 241, 242, 243,
+ 54, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 55, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 161, 56,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 52, -1, -1, -1,
+ -1, -1, -1, 28, 29, 30, 31, 32,
+ 33, 34, 35, 37, 38, 39, 40, 41,
+ 42, 43, 44, 45, -1, 53, 47, 51,
+ 50, 48, 46, 49, -1, -1, -1, 36,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 216,
+ -1, 218, 219, 220, 221, 222, 223, 224,
+ 225, 226, 227, 228, 229, 230, 231, 232,
+ 233, 234, 235, 236, 237, 238, 239, 240,
+ 241, 242, 243, 57, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 52, -1, -1, -1, -1, -1, -1, 28,
+ 29, 30, 31, 32, 33, 34, 35, 37,
+ 38, 39, 40, 41, 42, 43, 44, 45,
+ -1, 53, 47, 51, 50, 48, 46, 49,
+ -1, -1, -1, 36, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 216, -1, 218, 219, 220,
+ 221, 222, 223, 224, 225, 226, 227, 228,
+ 229, 230, 231, 232, 233, 234, 235, 236,
+ 237, 238, 239, 240, 241, 242, 243, 58,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 191, -1, -1, -1,
+ -1, 59, 60, 212, 274, -1, -1, -1,
+ -1, -1, -1, 61, -1, 279, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 175, 177, 178, 179,
+ 180, 181, 182, 183, -1, -1, 62, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 63, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, 64, -1, -1,
+ 65, 172, 165, 67, 68, 69, 70, 217,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 217, -1, -1, -1,
+ -1, -1, -1, 217, 217, 217, 217, 217,
+ 217, 217, 217, 217, 217, 217, 217, 217,
+ 217, 217, 217, 217, -1, 217, 217, 217,
+ 217, 217, 217, 217, -1, -1, -1, 217,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 71, 217,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 217, -1, -1, -1,
+ -1, -1, -1, 217, 217, 217, 217, 217,
+ 217, 217, 217, 217, 217, 217, 217, 217,
+ 217, 217, 217, 217, -1, 217, 217, 217,
+ 217, 217, 217, 217, -1, -1, -1, 217,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 72, 217,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 217, -1, -1, -1,
+ -1, -1, -1, 217, 217, 217, 217, 217,
+ 217, 217, 217, 217, 217, 217, 217, 217,
+ 217, 217, 217, 217, -1, 217, 217, 217,
+ 217, 217, 217, 217, -1, -1, -1, 217,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 73, 74,
+ 91, 75, 76, 77, 217, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 217, -1, -1, -1, -1, -1, -1,
+ 217, 217, 217, 217, 217, 217, 217, 217,
+ 217, 217, 217, 217, 217, 217, 217, 217,
+ 217, -1, 217, 217, 217, 217, 217, 217,
+ 217, -1, -1, -1, 217, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 78, 79, 217, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 217, -1, -1, -1, -1, -1,
+ -1, 217, 217, 217, 217, 217, 217, 217,
+ 217, 217, 217, 217, 217, 217, 217, 217,
+ 217, 217, -1, 217, 217, 217, 217, 217,
+ 217, 217, -1, -1, -1, 217, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 80, 81, 82, 83,
+ 89, 85, 88, 90, 87, 86, 217, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 217, -1, -1, -1, -1,
+ -1, -1, 217, 217, 217, 217, 217, 217,
+ 217, 217, 217, 217, 217, 217, 217, 217,
+ 217, 217, 217, -1, 217, 217, 217, 217,
+ 217, 217, 217, -1, -1, -1, 217, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, 66, 271, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 271, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 84, 160, 92, 167, 166, 173,
+ 93, 94, 188, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 188, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 95,
+ 193, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 193, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 96, 214, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 214,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 97,
+ 276, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 276,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 98,
+ 100, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 99, 281, 101, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 52, -1, -1, -1, -1, -1,
+ -1, 28, 29, 30, 31, 32, 33, 34,
+ 35, 37, 38, 39, 40, 41, 42, 43,
+ 44, 45, -1, 53, 47, 51, 50, 48,
+ 46, 49, -1, -1, -1, 36, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, 216, -1, 218,
+ 219, 220, 221, 222, 223, 224, 225, 226,
+ 227, 228, 229, 230, 231, 232, 233, 234,
+ 235, 236, 237, 238, 239, 240, 241, 242,
+ 243, 244, 245, 246, 247, 102, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 52, -1, -1, -1, -1, -1,
+ -1, 28, 29, 30, 31, 32, 33, 34,
+ 35, 37, 38, 39, 40, 41, 42, 43,
+ 44, 45, -1, 53, 47, 51, 50, 48,
+ 46, 49, -1, -1, -1, 36, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, 216, -1, 218,
+ 219, 220, 221, 222, 223, 224, 225, 226,
+ 227, 228, 229, 230, 231, 232, 233, 234,
+ 235, 236, 237, 238, 239, 240, 241, 242,
+ 243, 103, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 52, -1,
+ -1, -1, -1, -1, -1, 28, 29, 30,
+ 31, 32, 33, 34, 35, 37, 38, 39,
+ 40, 41, 42, 43, 44, 45, -1, 53,
+ 47, 51, 50, 48, 46, 49, -1, -1,
+ -1, 36, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 216, -1, 218, 219, 220, 221, 222,
+ 223, 224, 225, 226, 227, 228, 229, 230,
+ 231, 232, 233, 234, 235, 236, 237, 238,
+ 239, 240, 241, 242, 243, 104, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 52, -1, -1, -1, -1, -1,
+ -1, 28, 29, 30, 31, 32, 33, 34,
+ 35, 37, 38, 39, 40, 41, 42, 43,
+ 44, 45, -1, 53, 47, 51, 50, 48,
+ 46, 49, -1, -1, -1, 36, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, 216, -1, 218,
+ 219, 220, 221, 222, 223, 224, 225, 226,
+ 227, 228, 229, 230, 231, 232, 233, 234,
+ 235, 236, 237, 238, 239, 240, 241, 242,
+ 243, 251, 253, 254, 255, 105, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 52, -1, -1, -1, -1, -1,
+ -1, 28, 29, 30, 31, 32, 33, 34,
+ 35, 37, 38, 39, 40, 41, 42, 43,
+ 44, 45, -1, 53, 47, 51, 50, 48,
+ 46, 49, -1, -1, -1, 36, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, 216, -1, 218,
+ 219, 220, 221, 222, 223, 224, 225, 226,
+ 227, 228, 229, 230, 231, 232, 233, 234,
+ 235, 236, 237, 238, 239, 240, 241, 242,
+ 243, 257, 106, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 52,
+ -1, -1, -1, -1, -1, -1, 28, 29,
+ 30, 31, 32, 33, 34, 35, 37, 38,
+ 39, 40, 41, 42, 43, 44, 45, -1,
+ 53, 47, 51, 50, 48, 46, 49, -1,
+ -1, -1, 36, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 216, -1, 218, 219, 220, 221,
+ 222, 223, 224, 225, 226, 227, 228, 229,
+ 230, 231, 232, 233, 234, 235, 236, 237,
+ 238, 239, 240, 241, 242, 243, 259, 260,
+ 261, 107, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 108, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 270, 263,
+ 267, 266, 264, 262, 265, 252, 163, 184,
+ 185, 109, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, 110, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 187,
+ 111, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 112, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 192, 113, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 114, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 213, 115, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 116, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 275, 118, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 100, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 280, 117,
+ 268, 248, 249, 250, 256, 258, 269, 217,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 217, -1, -1, -1,
+ -1, -1, -1, 217, 217, 217, 217, 217,
+ 217, 217, 217, 217, 217, 217, 217, 217,
+ 217, 217, 217, 217, -1, 217, 217, 217,
+ 217, 217, 217, 217, -1, -1, -1, 217,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 119, 186,
+ 120, 190, 196, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 196, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, 196, -1, -1,
+ -1, -1, 196, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 121, 211, 217, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 217, -1, -1, -1, -1,
+ -1, -1, 217, 217, 217, 217, 217, 217,
+ 217, 217, 217, 217, 217, 217, 217, 217,
+ 217, 217, 217, -1, 217, 217, 217, 217,
+ 217, 217, 217, -1, -1, -1, 217, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, 122, 273, 123,
+ 282, 278, 124, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 52,
+ -1, -1, -1, -1, -1, -1, 28, 29,
+ 30, 31, 32, 33, 34, 35, 37, 38,
+ 39, 40, 41, 42, 43, 44, 45, -1,
+ 53, 47, 51, 50, 48, 46, 49, -1,
+ -1, -1, 36, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 216, -1, 218, 219, 220, 221,
+ 222, 223, 224, 225, 226, 227, 228, 229,
+ 230, 231, 232, 233, 234, 235, 236, 237,
+ 238, 239, 240, 241, 242, 243, 189, 125,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 207, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 126, -1, -1, -1, -1, 202,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 195, 197, 198, 199, 127, -1,
+ -1, 128, 129, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 52,
+ -1, -1, -1, -1, -1, -1, 28, 29,
+ 30, 31, 32, 33, 34, 35, 37, 38,
+ 39, 40, 41, 42, 43, 44, 45, -1,
+ 53, 47, 51, 50, 48, 46, 49, -1,
+ -1, -1, 36, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 216, -1, 218, 219, 220, 221,
+ 222, 223, 224, 225, 226, 227, 228, 229,
+ 230, 231, 232, 233, 234, 235, 236, 237,
+ 238, 239, 240, 241, 242, 243, 277, 272,
+ 194, 130, 204, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 204, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 131, 209, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 209, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 132, 215,
+ 200, 133, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 134, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 203, 135, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 136, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 208,
+ 201, 137, 206, 138, 205, 210,
+};
+
+unsigned short Parser_keys[] = {
+ 129, 188, 185, 185, 47, 189, 47, 195,
+ 47, 207, 47, 196, 129, 129, 47, 47,
+ 47, 208, 47, 210, 131, 131, 47, 209,
+ 130, 130, 47, 47, 47, 206, 47, 206,
+ 47, 206, 47, 204, 47, 211, 180, 180,
+ 47, 47, 143, 143, 47, 266, 47, 205,
+ 47, 266, 47, 266, 47, 272, 184, 184,
+ 145, 145, 47, 47, 47, 47, 47, 47,
+ 47, 47, 47, 206, 47, 206, 47, 206,
+ 47, 47, 47, 47, 47, 47, 47, 47,
+ 47, 47, 47, 206, 47, 47, 47, 206,
+ 47, 47, 47, 47, 47, 47, 47, 47,
+ 47, 47, 47, 47, 47, 47, 47, 47,
+ 47, 47, 47, 206, 47, 267, 153, 153,
+ 47, 47, 181, 181, 182, 182, 136, 136,
+ 47, 47, 47, 47, 47, 220, 47, 223,
+ 47, 237, 47, 270, 150, 274, 47, 266,
+ 155, 155, 156, 156, 157, 157, 158, 158,
+ 47, 266, 47, 266, 47, 266, 162, 162,
+ 163, 163, 164, 164, 165, 165, 47, 266,
+ 167, 167, 47, 266, 169, 169, 170, 170,
+ 171, 171, 47, 268, 174, 174, 175, 175,
+ 176, 176, 177, 177, 178, 178, 179, 179,
+ 183, 183, 154, 154, 137, 137, 138, 138,
+ 47, 221, 47, 224, 47, 238, 47, 271,
+ 47, 274, 47, 47, 148, 148, 159, 159,
+ 160, 160, 161, 161, 166, 166, 168, 168,
+ 173, 173, 47, 206, 147, 147, 47, 47,
+ 132, 132, 47, 225, 139, 139, 47, 206,
+ 140, 140, 47, 47, 150, 150, 149, 149,
+ 47, 266, 171, 171, 47, 233, 47, 266,
+ 142, 142, 148, 148, 133, 133, 47, 47,
+ 47, 231, 47, 234, 141, 141, 146, 146,
+ 47, 232, 47, 235, 151, 151, 47, 47,
+ 134, 134, 47, 47, 152, 152, 135, 135,
+ 0, 0
+};
+
+unsigned int Parser_offsets[] = {
+ 0, 60, 61, 204, 353, 514, 664, 665,
+ 666, 828, 992, 993, 1156, 1157, 1158, 1318,
+ 1478, 1638, 1796, 1961, 1962, 1963, 1964, 2184,
+ 2343, 2563, 2783, 3009, 3010, 3011, 3012, 3013,
+ 3014, 3015, 3175, 3335, 3495, 3496, 3497, 3498,
+ 3499, 3500, 3660, 3661, 3821, 3822, 3823, 3824,
+ 3825, 3826, 3827, 3828, 3829, 3830, 3990, 4211,
+ 4212, 4213, 4214, 4215, 4216, 4217, 4218, 4392,
+ 4569, 4760, 4984, 5109, 5329, 5330, 5331, 5332,
+ 5333, 5553, 5773, 5993, 5994, 5995, 5996, 5997,
+ 6217, 6218, 6438, 6439, 6440, 6441, 6663, 6664,
+ 6665, 6666, 6667, 6668, 6669, 6670, 6671, 6672,
+ 6673, 6848, 7026, 7218, 7443, 7671, 7672, 7673,
+ 7674, 7675, 7676, 7677, 7678, 7679, 7839, 7840,
+ 7841, 7842, 8021, 8022, 8182, 8183, 8184, 8185,
+ 8186, 8406, 8407, 8594, 8814, 8815, 8816, 8817,
+ 8818, 9003, 9191, 9192, 9193, 9379, 9568, 9569,
+ 9570, 9571, 9572, 9573, 9574
+};
+
+unsigned short Parser_targs[] = {
+ 1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32,
+ 33, 34, 35, 36, 37, 38, 39, 40,
+ 41, 42, 43, 44, 45, 46, 47, 48,
+ 49, 50, 51, 52, 53, 54, 55, 56,
+ 57, 58, 59, 60, 61, 62, 63, 64,
+ 65, 66, 67, 68, 69, 70, 71, 72,
+ 73, 74, 75, 76, 77, 78, 79, 80,
+ 81, 82, 83, 84, 85, 86, 87, 88,
+ 89, 90, 91, 92, 93, 94, 95, 96,
+ 97, 98, 99, 100, 101, 102, 103, 104,
+ 105, 106, 107, 108, 109, 110, 111, 112,
+ 113, 114, 115, 116, 117, 118, 119, 120,
+ 121, 122, 123, 124, 125, 126, 127, 128,
+ 129, 130, 131, 132, 133, 134, 135, 136,
+ 137, 138, 139, 140, 140, 140, 140, 140,
+ 140, 140, 140, 140, 140, 140, 140, 140,
+ 140, 140, 140, 140, 140, 140, 140, 140,
+ 140, 140, 140, 140, 140, 140, 140, 140,
+ 140, 140, 140, 140, 140, 140, 140, 140,
+ 140, 140, 140, 140, 140, 140, 140, 140,
+ 140, 140, 140, 140, 140, 140, 140, 140,
+ 140, 140, 140, 140, 140, 140, 140, 140,
+ 140, 140, 140, 140, 140, 140, 140, 140,
+ 140, 140, 140, 140, 140, 140, 140, 140,
+ 140, 140, 140, 140, 140, 140, 140, 140,
+ 140, 140, 140, 140, 140, 140, 140, 140,
+ 140, 140, 140, 140, 140, 140, 140, 140,
+ 140, 140, 140, 140, 140, 140, 140, 140,
+ 140, 140, 140, 140, 140, 140, 140, 140,
+ 140, 140, 140, 140, 140, 140, 140, 140,
+ 140, 140, 140, 140, 140, 140, 140, 140,
+ 140, 140, 140, 140, 140, 140, 140, 140,
+ 140, 140, 140, 140
+};
+
+unsigned int Parser_actInds[] = {
+ 0, 2, 4, 6, 8, 10, 12, 14,
+ 16, 18, 20, 22, 24, 26, 28, 30,
+ 32, 34, 36, 38, 40, 42, 44, 46,
+ 48, 50, 52, 54, 56, 58, 60, 62,
+ 64, 66, 68, 70, 72, 74, 76, 78,
+ 80, 82, 84, 86, 88, 90, 92, 94,
+ 96, 98, 100, 102, 104, 106, 108, 110,
+ 112, 114, 116, 118, 120, 122, 124, 126,
+ 128, 130, 132, 134, 136, 138, 140, 142,
+ 144, 146, 148, 150, 152, 154, 156, 158,
+ 160, 162, 164, 166, 168, 170, 172, 174,
+ 176, 178, 180, 182, 184, 186, 188, 190,
+ 192, 194, 196, 198, 200, 202, 204, 206,
+ 208, 210, 212, 214, 216, 218, 220, 222,
+ 224, 226, 228, 230, 232, 234, 236, 238,
+ 240, 242, 244, 246, 248, 250, 252, 254,
+ 256, 258, 260, 262, 264, 266, 268, 270,
+ 272, 274, 276, 278, 280, 282, 284, 286,
+ 288, 290, 292, 294, 296, 298, 300, 302,
+ 304, 306, 308, 310, 312, 314, 316, 318,
+ 320, 322, 324, 326, 328, 330, 332, 334,
+ 336, 338, 340, 342, 344, 346, 348, 350,
+ 352, 354, 356, 358, 360, 362, 364, 366,
+ 368, 370, 372, 374, 376, 378, 380, 382,
+ 384, 386, 388, 390, 392, 394, 396, 398,
+ 400, 402, 404, 406, 408, 410, 412, 414,
+ 416, 418, 420, 422, 424, 426, 428, 430,
+ 432, 434, 436, 438, 440, 442, 444, 446,
+ 448, 450, 452, 454, 456, 458, 460, 462,
+ 464, 466, 468, 470, 472, 474, 476, 478,
+ 480, 482, 484, 486, 488, 490, 492, 494,
+ 496, 498, 500, 502, 504, 506, 508, 510,
+ 512, 514, 516, 518, 520, 522, 524, 526,
+ 528, 530, 532, 534, 536, 538, 540, 542,
+ 544, 546, 548, 550, 552, 554, 556, 558,
+ 560, 562, 564, 566
+};
+
+unsigned int Parser_actions[] = {
+ 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 3, 0,
+ 6, 0, 11, 0, 15, 0, 19, 0,
+ 22, 0, 27, 0, 30, 0, 35, 0,
+ 39, 0, 43, 0, 47, 0, 51, 0,
+ 55, 0, 58, 0, 63, 0, 67, 0,
+ 71, 0, 75, 0, 79, 0, 83, 0,
+ 87, 0, 91, 0, 94, 0, 99, 0,
+ 103, 0, 107, 0, 111, 0, 115, 0,
+ 119, 0, 123, 0, 127, 0, 130, 0,
+ 135, 0, 139, 0, 143, 0, 147, 0,
+ 150, 0, 155, 0, 159, 0, 163, 0,
+ 167, 0, 171, 0, 175, 0, 179, 0,
+ 183, 0, 187, 0, 191, 0, 195, 0,
+ 198, 0, 203, 0, 207, 0, 211, 0,
+ 215, 0, 218, 0, 223, 0, 227, 0,
+ 230, 0, 235, 0, 239, 0, 243, 0,
+ 247, 0, 251, 0, 255, 0, 259, 0,
+ 262, 0, 267, 0, 271, 0, 275, 0,
+ 279, 0, 282, 0, 287, 0, 291, 0,
+ 295, 0, 299, 0, 302, 0, 307, 0,
+ 311, 0, 314, 0, 319, 0, 323, 0,
+ 327, 0, 331, 0, 335, 0, 339, 0,
+ 343, 0, 347, 0, 351, 0, 355, 0,
+ 359, 0, 363, 0, 367, 0, 371, 0,
+ 375, 0, 379, 0, 383, 0, 387, 0,
+ 391, 0, 395, 0, 399, 0, 403, 0,
+ 407, 0, 411, 0, 415, 0, 419, 0,
+ 423, 0, 427, 0, 431, 0, 435, 0,
+ 439, 0, 443, 0, 447, 0, 451, 0,
+ 455, 0, 459, 0, 463, 0, 467, 0,
+ 471, 0, 475, 0, 479, 0, 483, 0,
+ 487, 0, 491, 0, 495, 0, 499, 0,
+ 503, 0, 507, 0, 511, 0, 515, 0,
+ 519, 0, 523, 0, 527, 0, 530, 0,
+ 535, 0, 539, 0, 543, 0, 547, 0,
+ 550, 0, 555, 0, 559, 0, 563, 0,
+ 567, 0, 571, 0, 575, 0, 1, 0
+};
+
+int Parser_commitLen[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 2
+};
+
+unsigned int Parser_fssProdIdIndex[] = {
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39,
+ 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55,
+ 56, 57, 58, 59, 60, 61, 62, 63,
+ 64, 65, 66, 67, 68, 69, 70, 71,
+ 72, 73, 74, 75, 76, 77, 78, 79,
+ 80, 81, 82, 83, 84, 85, 86, 87,
+ 88, 89, 90, 91, 92, 93, 94, 95,
+ 96, 97, 98, 99, 100, 101, 102, 103,
+ 104, 105, 106, 107, 108, 109, 110, 111,
+ 112, 113, 114, 115, 116, 117, 118, 119,
+ 120, 121, 122, 123, 124, 125, 126, 127,
+ 128, 129, 130, 131, 132, 133, 134, 135,
+ 136, 137, 138, 139, 140, 141, 142, 143,
+ 144
+};
+
+char Parser_fssProdLengths[] = {
+ 1, 0, 5, 1, 2, 0, 2, 0,
+ 1, 1, 3, 4, 1, 2, 0, 1,
+ 1, 1, 1, 1, 1, 4, 2, 0,
+ 3, 3, 4, 4, 4, 4, 1, 2,
+ 0, 3, 4, 1, 2, 0, 1, 1,
+ 1, 1, 1, 1, 1, 3, 3, 4,
+ 2, 0, 3, 4, 1, 2, 0, 4,
+ 2, 0, 1, 1, 1, 3, 4, 1,
+ 2, 0, 3, 4, 1, 2, 0, 3,
+ 4, 1, 2, 0, 4, 2, 0, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 3, 3, 3, 3, 4, 4, 4,
+ 3, 3, 3, 3, 3, 4, 3, 4,
+ 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 4, 4, 2, 0, 4, 4, 1,
+ 2, 0, 3, 4, 1, 2, 1, 3,
+ 1
+};
+
+unsigned short Parser_prodLhsIds[] = {
+ 187, 187, 186, 188, 189, 189, 190, 190,
+ 192, 192, 193, 191, 195, 196, 196, 197,
+ 197, 197, 197, 197, 197, 202, 204, 204,
+ 205, 198, 199, 200, 201, 194, 207, 208,
+ 208, 209, 203, 210, 211, 211, 212, 212,
+ 212, 212, 212, 212, 212, 213, 214, 215,
+ 220, 220, 221, 216, 222, 223, 223, 224,
+ 225, 225, 226, 226, 226, 227, 228, 230,
+ 231, 231, 232, 229, 233, 234, 234, 235,
+ 217, 236, 237, 237, 238, 206, 206, 239,
+ 239, 239, 239, 239, 239, 239, 239, 239,
+ 239, 239, 239, 239, 239, 239, 239, 239,
+ 239, 239, 239, 239, 239, 239, 239, 239,
+ 239, 241, 242, 243, 244, 245, 246, 247,
+ 248, 249, 250, 251, 252, 253, 254, 255,
+ 256, 257, 258, 259, 260, 261, 262, 263,
+ 264, 265, 266, 267, 267, 268, 218, 269,
+ 270, 270, 271, 219, 272, 273, 273, 274,
+ 275
+};
+
+const char *Parser_prodNames[] = {
+ "start-1",
+ "start-2",
+ "tag_ragel-1",
+ "tag_ragel_head-1",
+ "ragel_def_list-1",
+ "ragel_def_list-2",
+ "host_or_write_list-1",
+ "host_or_write_list-2",
+ "host_or_write-1",
+ "host_or_write-2",
+ "tag_host-1",
+ "ragel_def-1",
+ "tag_ragel_def_head-1",
+ "ragel_def_item_list-1",
+ "ragel_def_item_list-2",
+ "ragel_def_item-1",
+ "ragel_def_item-2",
+ "ragel_def_item-3",
+ "ragel_def_item-4",
+ "ragel_def_item-5",
+ "ragel_def_item-6",
+ "tag_export_list-1",
+ "export_list-1",
+ "export_list-2",
+ "tag_export-1",
+ "tag_alph_type-1",
+ "tag_getkey_expr-1",
+ "tag_access_expr-1",
+ "tag_curstate_expr-1",
+ "tag_write-1",
+ "tag_write_head-1",
+ "write_option_list-1",
+ "write_option_list-2",
+ "tag_arg-1",
+ "tag_machine-1",
+ "tag_machine_head-1",
+ "machine_item_list-1",
+ "machine_item_list-2",
+ "machine_item-1",
+ "machine_item-2",
+ "machine_item-3",
+ "machine_item-4",
+ "machine_item-5",
+ "machine_item-6",
+ "machine_item-7",
+ "tag_start_state-1",
+ "tag_error_state-1",
+ "tag_entry_points-1",
+ "entry_point_list-1",
+ "entry_point_list-2",
+ "tag_entry-1",
+ "tag_state_list-1",
+ "tag_state_list_head-1",
+ "state_list-1",
+ "state_list-2",
+ "tag_state-1",
+ "state_item_list-1",
+ "state_item_list-2",
+ "state_item-1",
+ "state_item-2",
+ "state_item-3",
+ "tag_state_actions-1",
+ "tag_state_cond_list-1",
+ "tag_state_cond_list_head-1",
+ "state_cond_list-1",
+ "state_cond_list-2",
+ "state_cond-1",
+ "tag_trans_list-1",
+ "tag_trans_list_head-1",
+ "trans_list-1",
+ "trans_list-2",
+ "tag_trans-1",
+ "tag_action_list-1",
+ "tag_action_list_head-1",
+ "action_list-1",
+ "action_list-2",
+ "tag_action-1",
+ "inline_list-1",
+ "inline_list-2",
+ "inline_item-1",
+ "inline_item-2",
+ "inline_item-3",
+ "inline_item-4",
+ "inline_item-5",
+ "inline_item-6",
+ "inline_item-7",
+ "inline_item-8",
+ "inline_item-9",
+ "inline_item-10",
+ "inline_item-11",
+ "inline_item-12",
+ "inline_item-13",
+ "inline_item-14",
+ "inline_item-15",
+ "inline_item-16",
+ "inline_item-17",
+ "inline_item-18",
+ "inline_item-19",
+ "inline_item-20",
+ "inline_item-21",
+ "inline_item-22",
+ "inline_item-23",
+ "inline_item-24",
+ "inline_item-25",
+ "inline_item-26",
+ "tag_text-1",
+ "tag_goto-1",
+ "tag_call-1",
+ "tag_next-1",
+ "tag_goto_expr-1",
+ "tag_call_expr-1",
+ "tag_next_expr-1",
+ "tag_ret-1",
+ "tag_break-1",
+ "tag_pchar-1",
+ "tag_char-1",
+ "tag_hold-1",
+ "tag_exec-1",
+ "tag_holdte-1",
+ "tag_execte-1",
+ "tag_curs-1",
+ "tag_targs-1",
+ "tag_il_entry-1",
+ "tag_init_tokstart-1",
+ "tag_init_act-1",
+ "tag_get_tokend-1",
+ "tag_set_tokstart-1",
+ "tag_set_tokend-1",
+ "tag_set_act-1",
+ "tag_sub_action-1",
+ "tag_lm_switch-1",
+ "lm_action_list-1",
+ "lm_action_list-2",
+ "tag_inline_action-1",
+ "tag_action_table_list-1",
+ "tag_action_table_list_head-1",
+ "action_table_list-1",
+ "action_table_list-2",
+ "tag_action_table-1",
+ "tag_cond_space_list-1",
+ "tag_cond_space_list_head-1",
+ "cond_space_list-1",
+ "cond_space_list-2",
+ "tag_cond_space-1",
+ "_start-1"
+};
+
+const char *Parser_lelNames[] = {
+ "D-0",
+ "D-1",
+ "D-2",
+ "D-3",
+ "D-4",
+ "D-5",
+ "D-6",
+ "D-7",
+ "D-8",
+ "D-9",
+ "D-10",
+ "D-11",
+ "D-12",
+ "D-13",
+ "D-14",
+ "D-15",
+ "D-16",
+ "D-17",
+ "D-18",
+ "D-19",
+ "D-20",
+ "D-21",
+ "D-22",
+ "D-23",
+ "D-24",
+ "D-25",
+ "D-26",
+ "D-27",
+ "D-28",
+ "D-29",
+ "D-30",
+ "D-31",
+ "D-32",
+ "!",
+ "\"",
+ "#",
+ "$",
+ "%",
+ "&",
+ "'",
+ "(",
+ ")",
+ "*",
+ "+",
+ ",",
+ "-",
+ ".",
+ "/",
+ "0",
+ "1",
+ "2",
+ "3",
+ "4",
+ "5",
+ "6",
+ "7",
+ "8",
+ "9",
+ ":",
+ ";",
+ "<",
+ "=",
+ ">",
+ "?",
+ "@",
+ "A",
+ "B",
+ "C",
+ "D",
+ "E",
+ "F",
+ "G",
+ "H",
+ "I",
+ "J",
+ "K",
+ "L",
+ "M",
+ "N",
+ "O",
+ "P",
+ "Q",
+ "R",
+ "S",
+ "T",
+ "U",
+ "V",
+ "W",
+ "X",
+ "Y",
+ "Z",
+ "[",
+ "\\",
+ "]",
+ "^",
+ "_",
+ "`",
+ "a",
+ "b",
+ "c",
+ "d",
+ "e",
+ "f",
+ "g",
+ "h",
+ "i",
+ "j",
+ "k",
+ "l",
+ "m",
+ "n",
+ "o",
+ "p",
+ "q",
+ "r",
+ "s",
+ "t",
+ "u",
+ "v",
+ "w",
+ "x",
+ "y",
+ "z",
+ "{",
+ "|",
+ "}",
+ "~",
+ "D-127",
+ "TAG_unknown",
+ "TAG_ragel",
+ "TAG_ragel_def",
+ "TAG_host",
+ "TAG_state_list",
+ "TAG_state",
+ "TAG_trans_list",
+ "TAG_t",
+ "TAG_machine",
+ "TAG_start_state",
+ "TAG_error_state",
+ "TAG_action_list",
+ "TAG_action_table_list",
+ "TAG_action",
+ "TAG_action_table",
+ "TAG_alphtype",
+ "TAG_element",
+ "TAG_getkey",
+ "TAG_state_actions",
+ "TAG_entry_points",
+ "TAG_sub_action",
+ "TAG_cond_space_list",
+ "TAG_cond_space",
+ "TAG_cond_list",
+ "TAG_c",
+ "TAG_exports",
+ "TAG_ex",
+ "TAG_text",
+ "TAG_goto",
+ "TAG_call",
+ "TAG_next",
+ "TAG_goto_expr",
+ "TAG_call_expr",
+ "TAG_next_expr",
+ "TAG_ret",
+ "TAG_pchar",
+ "TAG_char",
+ "TAG_hold",
+ "TAG_exec",
+ "TAG_holdte",
+ "TAG_execte",
+ "TAG_curs",
+ "TAG_targs",
+ "TAG_entry",
+ "TAG_data",
+ "TAG_lm_switch",
+ "TAG_init_act",
+ "TAG_set_act",
+ "TAG_set_tokend",
+ "TAG_get_tokend",
+ "TAG_init_tokstart",
+ "TAG_set_tokstart",
+ "TAG_write",
+ "TAG_curstate",
+ "TAG_access",
+ "TAG_break",
+ "TAG_arg",
+ "_eof",
+ "tag_ragel",
+ "start",
+ "tag_ragel_head",
+ "ragel_def_list",
+ "host_or_write_list",
+ "ragel_def",
+ "host_or_write",
+ "tag_host",
+ "tag_write",
+ "tag_ragel_def_head",
+ "ragel_def_item_list",
+ "ragel_def_item",
+ "tag_alph_type",
+ "tag_getkey_expr",
+ "tag_access_expr",
+ "tag_curstate_expr",
+ "tag_export_list",
+ "tag_machine",
+ "export_list",
+ "tag_export",
+ "inline_list",
+ "tag_write_head",
+ "write_option_list",
+ "tag_arg",
+ "tag_machine_head",
+ "machine_item_list",
+ "machine_item",
+ "tag_start_state",
+ "tag_error_state",
+ "tag_entry_points",
+ "tag_state_list",
+ "tag_action_list",
+ "tag_action_table_list",
+ "tag_cond_space_list",
+ "entry_point_list",
+ "tag_entry",
+ "tag_state_list_head",
+ "state_list",
+ "tag_state",
+ "state_item_list",
+ "state_item",
+ "tag_state_actions",
+ "tag_state_cond_list",
+ "tag_trans_list",
+ "tag_state_cond_list_head",
+ "state_cond_list",
+ "state_cond",
+ "tag_trans_list_head",
+ "trans_list",
+ "tag_trans",
+ "tag_action_list_head",
+ "action_list",
+ "tag_action",
+ "inline_item",
+ "inline_item_type",
+ "tag_text",
+ "tag_goto",
+ "tag_call",
+ "tag_next",
+ "tag_goto_expr",
+ "tag_call_expr",
+ "tag_next_expr",
+ "tag_ret",
+ "tag_break",
+ "tag_pchar",
+ "tag_char",
+ "tag_hold",
+ "tag_exec",
+ "tag_holdte",
+ "tag_execte",
+ "tag_curs",
+ "tag_targs",
+ "tag_il_entry",
+ "tag_init_tokstart",
+ "tag_init_act",
+ "tag_get_tokend",
+ "tag_set_tokstart",
+ "tag_set_tokend",
+ "tag_set_act",
+ "tag_sub_action",
+ "tag_lm_switch",
+ "lm_action_list",
+ "tag_inline_action",
+ "tag_action_table_list_head",
+ "action_table_list",
+ "tag_action_table",
+ "tag_cond_space_list_head",
+ "cond_space_list",
+ "tag_cond_space",
+ "_start"
+};
+
+#line 851 "xmlparse.kl"
+
+
+void Parser::init()
+{
+ #line 2079 "xmlparse.cpp"
+ curs = Parser_startState;
+ pool = 0;
+ freshEl = (struct Parser_LangEl*) malloc( sizeof(struct Parser_LangEl)*8128);
+ #ifdef LOG_ACTIONS
+ cerr << "allocating 8128 LangEls" << endl;
+ #endif
+ stackTop = freshEl;
+ stackTop->type = 0;
+ stackTop->state = -1;
+ stackTop->next = 0;
+ stackTop->child = 0;
+ freshPos = 1;
+ lastFinal = stackTop;
+ numRetry = 0;
+ numNodes = 0;
+ errCount = 0;
+#line 856 "xmlparse.kl"
+}
+
+int Parser::parseLangEl( int type, const Token *token )
+{
+ #line 2101 "xmlparse.cpp"
+#define reject() induceReject = 1
+
+ int pos, targState;
+ unsigned int *action;
+ int rhsLen;
+ struct Parser_LangEl *rhs[32];
+ struct Parser_LangEl *lel;
+ struct Parser_LangEl *input;
+ char induceReject;
+
+ if ( curs < 0 )
+ return 0;
+
+ if ( pool == 0 ) {
+ if ( freshPos == 8128 ) {
+ freshEl = (struct Parser_LangEl*) malloc(
+ sizeof(struct Parser_LangEl)*8128);
+ #ifdef LOG_ACTIONS
+ cerr << "allocating 8128 LangEls" << endl;
+ #endif
+ freshPos = 0;
+ }
+ input = freshEl + freshPos++;
+ }
+ else {
+ input = pool;
+ pool = pool->next;
+ }
+ numNodes += 1;
+ input->type = type;
+ input->user.token = *token;
+ input->next = 0;
+ input->retry = 0;
+ input->child = 0;
+
+again:
+ if ( input == 0 )
+ goto _out;
+
+ lel = input;
+ if ( lel->type < Parser_keys[curs<<1] || lel->type > Parser_keys[(curs<<1)+1] )
+ goto parseError;
+
+ pos = Parser_indicies[Parser_offsets[curs] + (lel->type - Parser_keys[curs<<1])];
+ if ( pos < 0 )
+ goto parseError;
+
+ induceReject = 0;
+ targState = Parser_targs[pos];
+ action = Parser_actions + Parser_actInds[pos];
+ if ( lel->retry & 0x0000ffff )
+ action += (lel->retry & 0x0000ffff);
+
+ if ( *action & 0x1 ) {
+ #ifdef LOG_ACTIONS
+ cerr << "shifted: " << Parser_lelNames[lel->type];
+ #endif
+ input = input->next;
+ lel->state = curs;
+ lel->next = stackTop;
+ stackTop = lel;
+
+ if ( action[1] == 0 )
+ lel->retry &= 0xffff0000;
+ else {
+ lel->retry += 1;
+ numRetry += 1;
+ #ifdef LOG_ACTIONS
+ cerr << " retry: " << stackTop;
+ #endif
+ }
+ #ifdef LOG_ACTIONS
+ cerr << endl;
+ #endif
+ }
+
+ if ( Parser_commitLen[pos] != 0 ) {
+ struct Parser_LangEl *commitHead = stackTop;
+ int absCommitLen = Parser_commitLen[pos];
+
+ #ifdef LOG_ACTIONS
+ cerr << "running commit of length: " << Parser_commitLen[pos] << endl;
+ #endif
+
+ if ( absCommitLen < 0 ) {
+ commitHead = commitHead->next;
+ absCommitLen = -1 * absCommitLen;
+ }
+ {
+ struct Parser_LangEl *lel = commitHead;
+ struct Parser_LangEl **cmStack = (struct Parser_LangEl**) malloc( sizeof(struct Parser_LangEl) * numNodes);
+ int n = absCommitLen, depth = 0, sp = 0;
+
+commit_head:
+ if ( lel->retry > 0 ) {
+ if ( lel->retry & 0x0000ffff )
+ numRetry -= 1;
+ if ( lel->retry & 0xffff0000 )
+ numRetry -= 1;
+ lel->retry = 0;
+ }
+
+ /* If depth is > 0 then move over lel freely, otherwise, make
+ * sure that we have not already done n steps down the line. */
+ if ( lel->next != 0 && ( depth > 0 || n > 1 ) ) {
+ cmStack[sp++] = lel;
+ lel = lel->next;
+
+ /* If we are at the top level count the steps down the line. */
+ if ( depth == 0 )
+ n -= 1;
+ goto commit_head;
+ }
+
+commit_reverse:
+ if ( lel->child != 0 ) {
+ cmStack[sp++] = lel;
+ lel = lel->child;
+
+ /* When we move down we need to increment the depth. */
+ depth += 1;
+ goto commit_head;
+ }
+
+commit_upwards:
+ if ( sp > 0 ) {
+ /* Figure out which place to return to. */
+ if ( cmStack[sp-1]->next == lel ) {
+ lel = cmStack[--sp];
+ goto commit_reverse;
+ }
+ else {
+ /* Going back up, adjust the depth. */
+ lel = cmStack[--sp];
+ depth -= 1;
+ goto commit_upwards;
+ }
+ }
+ free( cmStack );
+ }
+ if ( numRetry == 0 ) {
+ #ifdef LOG_ACTIONS
+ cerr << "number of retries is zero, "
+ "executing final actions" << endl;
+ #endif
+ {
+ struct Parser_LangEl *lel = commitHead;
+ struct Parser_LangEl **cmStack = (struct Parser_LangEl**) malloc( sizeof( struct Parser_LangEl) * numNodes);
+ int sp = 0;
+ char doExec = 0;
+
+final_head:
+ if ( lel == lastFinal ) {
+ doExec = 1;
+ goto hit_final;
+ }
+
+ if ( lel->next != 0 ) {
+ cmStack[sp++] = lel;
+ lel = lel->next;
+ goto final_head;
+ }
+
+final_reverse:
+
+ if ( lel->child != 0 ) {
+ cmStack[sp++] = lel;
+ lel = lel->child;
+ goto final_head;
+ }
+
+final_upwards:
+
+ if ( doExec ) {
+{
+ if ( lel->type < 186 ) {
+ }
+ else {
+ struct Parser_LangEl *redLel = lel;
+ if ( redLel->child != 0 ) {
+ int r = Parser_fssProdLengths[redLel->reduction] - 1;
+ struct Parser_LangEl *rhsEl = redLel->child;
+ while ( rhsEl != 0 ) {
+ rhs[r--] = rhsEl;
+ rhsEl = rhsEl->next;
+ }
+ }
+switch ( lel->reduction ) {
+case 1: {
+#line 46 "xmlparse.kl"
+
+ /* If we get no input the assumption is that the frontend died and
+ * emitted an error. */
+ errCount += 1;
+
+
+#line 2297 "xmlparse.cpp"
+} break;
+case 3: {
+#line 55 "xmlparse.kl"
+
+ Attribute *fileNameAttr = (&rhs[0]->user.token)->tag->findAttr( "filename" );
+ if ( fileNameAttr == 0 ) {
+ error((&rhs[0]->user.token)->loc) << "tag <ragel> requires a filename attribute" << endl;
+ exit(1);
+ }
+ else {
+ sourceFileName = fileNameAttr->value;
+
+ Attribute *langAttr = (&rhs[0]->user.token)->tag->findAttr( "lang" );
+ if ( langAttr == 0 )
+ error((&rhs[0]->user.token)->loc) << "tag <ragel> requires a lang attribute" << endl;
+ else {
+ if ( strcmp( langAttr->value, "C" ) == 0 ) {
+ hostLangType = CCode;
+ hostLang = &hostLangC;
+ }
+ else if ( strcmp( langAttr->value, "D" ) == 0 ) {
+ hostLangType = DCode;
+ hostLang = &hostLangD;
+ }
+ else if ( strcmp( langAttr->value, "Java" ) == 0 ) {
+ hostLangType = JavaCode;
+ hostLang = &hostLangJava;
+ }
+ else if ( strcmp( langAttr->value, "Ruby" ) == 0 ) {
+ hostLangType = RubyCode;
+ hostLang = &hostLangRuby;
+ }
+ else {
+ error((&rhs[0]->user.token)->loc) << "expecting lang attribute to be "
+ "one of C, D, Java or Ruby" << endl;
+ }
+
+ outStream = openOutput( sourceFileName );
+ }
+ }
+
+
+#line 2340 "xmlparse.cpp"
+} break;
+case 10: {
+#line 105 "xmlparse.kl"
+
+ Attribute *lineAttr = (&rhs[0]->user.token)->tag->findAttr( "line" );
+ if ( lineAttr == 0 )
+ error((&rhs[0]->user.token)->loc) << "tag <host> requires a line attribute" << endl;
+ else {
+ int line = atoi( lineAttr->value );
+ if ( outputActive )
+ lineDirective( *outStream, sourceFileName, line );
+ }
+
+ if ( outputActive )
+ *outStream << (&rhs[2]->user.token)->tag->content;
+
+
+#line 2358 "xmlparse.cpp"
+} break;
+case 11: {
+#line 121 "xmlparse.kl"
+
+ /* Do this before distributing transitions out to singles and defaults
+ * makes life easier. */
+ cgd->redFsm->maxKey = cgd->findMaxKey();
+
+ cgd->redFsm->assignActionLocs();
+
+ /* Find the first final state (The final state with the lowest id). */
+ cgd->redFsm->findFirstFinState();
+
+ /* Call the user's callback. */
+ cgd->finishRagelDef();
+
+
+#line 2376 "xmlparse.cpp"
+} break;
+case 12: {
+#line 136 "xmlparse.kl"
+
+ char *fsmName = 0;
+ Attribute *nameAttr = (&rhs[0]->user.token)->tag->findAttr( "name" );
+ if ( nameAttr != 0 ) {
+ fsmName = nameAttr->value;
+
+ CodeGenMapEl *mapEl = codeGenMap.find( fsmName );
+ if ( mapEl != 0 )
+ cgd = mapEl->value;
+ else {
+ cgd = makeCodeGen( sourceFileName, fsmName, *outStream, wantComplete );
+ codeGenMap.insert( fsmName, cgd );
+ }
+ }
+ else {
+ cgd = makeCodeGen( sourceFileName, fsmName,
+ *outStream, wantComplete );
+ }
+
+ ::keyOps = &cgd->thisKeyOps;
+
+
+#line 2402 "xmlparse.cpp"
+} break;
+case 24: {
+#line 174 "xmlparse.kl"
+
+ Attribute *nameAttr = (&rhs[0]->user.token)->tag->findAttr( "name" );
+ if ( nameAttr == 0 )
+ error((&rhs[0]->user.token)->loc) << "tag <ex> requires a name attribute" << endl;
+ else {
+ char *td = (&rhs[2]->user.token)->tag->content;
+ Key exportKey = readKey( td, &td );
+ cgd->exportList.append( new Export( nameAttr->value, exportKey ) );
+ }
+
+
+#line 2417 "xmlparse.cpp"
+} break;
+case 25: {
+#line 186 "xmlparse.kl"
+
+ if ( ! cgd->setAlphType( (&rhs[2]->user.token)->tag->content ) )
+ error((&rhs[0]->user.token)->loc) << "tag <alphtype> specifies unknown alphabet type" << endl;
+
+
+#line 2426 "xmlparse.cpp"
+} break;
+case 26: {
+#line 192 "xmlparse.kl"
+
+ cgd->getKeyExpr = (&rhs[1]->user.inline_list)->inlineList;
+
+
+#line 2434 "xmlparse.cpp"
+} break;
+case 27: {
+#line 197 "xmlparse.kl"
+
+ cgd->accessExpr = (&rhs[1]->user.inline_list)->inlineList;
+
+
+#line 2442 "xmlparse.cpp"
+} break;
+case 28: {
+#line 202 "xmlparse.kl"
+
+ cgd->curStateExpr = (&rhs[1]->user.inline_list)->inlineList;
+
+
+#line 2450 "xmlparse.cpp"
+} break;
+case 29: {
+#line 207 "xmlparse.kl"
+
+ /* Terminate the options list and call the write statement handler. */
+ writeOptions.append(0);
+ cgd->writeStatement( (&rhs[0]->user.tag_write_head)->loc, writeOptions.length()-1, writeOptions.data );
+
+ /* CodeGenData may have issued an error. */
+ errCount += cgd->codeGenErrCount;
+
+ /* Clear the options in prep for the next write statement. */
+ writeOptions.empty();
+
+
+#line 2466 "xmlparse.cpp"
+} break;
+case 30: {
+#line 225 "xmlparse.kl"
+
+ Attribute *nameAttr = (&rhs[0]->user.token)->tag->findAttr( "def_name" );
+ Attribute *lineAttr = (&rhs[0]->user.token)->tag->findAttr( "line" );
+ Attribute *colAttr = (&rhs[0]->user.token)->tag->findAttr( "col" );
+
+ if ( nameAttr == 0 )
+ error((&rhs[0]->user.token)->loc) << "tag <write> requires a def_name attribute" << endl;
+ if ( lineAttr == 0 )
+ error((&rhs[0]->user.token)->loc) << "tag <write> requires a line attribute" << endl;
+ if ( colAttr == 0 )
+ error((&rhs[0]->user.token)->loc) << "tag <write> requires a col attribute" << endl;
+
+ if ( nameAttr != 0 && lineAttr != 0 && colAttr != 0 ) {
+ CodeGenMapEl *mapEl = codeGenMap.find( nameAttr->value );
+ if ( mapEl == 0 )
+ error((&rhs[0]->user.token)->loc) << "internal error: cannot find codeGen" << endl;
+ else {
+ cgd = mapEl->value;
+ ::keyOps = &cgd->thisKeyOps;
+ }
+
+ (&redLel->user.tag_write_head)->loc.line = atoi(lineAttr->value);
+ (&redLel->user.tag_write_head)->loc.col = atoi(colAttr->value);
+ }
+
+
+#line 2496 "xmlparse.cpp"
+} break;
+case 33: {
+#line 261 "xmlparse.kl"
+
+ writeOptions.append( (&rhs[2]->user.token)->tag->content );
+
+
+#line 2504 "xmlparse.cpp"
+} break;
+case 34: {
+#line 266 "xmlparse.kl"
+
+ cgd->closeMachine();
+
+
+#line 2512 "xmlparse.cpp"
+} break;
+case 35: {
+#line 271 "xmlparse.kl"
+
+ cgd->createMachine();
+
+
+#line 2520 "xmlparse.cpp"
+} break;
+case 45: {
+#line 291 "xmlparse.kl"
+
+ unsigned long startState = strtoul( (&rhs[2]->user.token)->tag->content, 0, 10 );
+ cgd->setStartState( startState );
+
+
+#line 2529 "xmlparse.cpp"
+} break;
+case 46: {
+#line 297 "xmlparse.kl"
+
+ unsigned long errorState = strtoul( (&rhs[2]->user.token)->tag->content, 0, 10 );
+ cgd->setErrorState( errorState );
+
+
+#line 2538 "xmlparse.cpp"
+} break;
+case 47: {
+#line 303 "xmlparse.kl"
+
+ Attribute *errorAttr = (&rhs[0]->user.token)->tag->findAttr( "error" );
+ if ( errorAttr != 0 )
+ cgd->setForcedErrorState();
+
+
+#line 2548 "xmlparse.cpp"
+} break;
+case 50: {
+#line 313 "xmlparse.kl"
+
+ Attribute *nameAttr = (&rhs[0]->user.token)->tag->findAttr( "name" );
+ if ( nameAttr == 0 ) {
+ error((&rhs[0]->user.token)->loc) << "tag <entry_points>::<entry> "
+ "requires a name attribute" << endl;
+ }
+ else {
+ char *data = (&rhs[2]->user.token)->tag->content;
+ unsigned long entry = strtoul( data, &data, 10 );
+ cgd->addEntryPoint( nameAttr->value, entry );
+ }
+
+
+#line 2565 "xmlparse.cpp"
+} break;
+case 52: {
+#line 329 "xmlparse.kl"
+
+ Attribute *lengthAttr = (&rhs[0]->user.token)->tag->findAttr( "length" );
+ if ( lengthAttr == 0 )
+ error((&rhs[0]->user.token)->loc) << "tag <state_list> requires a length attribute" << endl;
+ else {
+ unsigned long length = strtoul( lengthAttr->value, 0, 10 );
+ cgd->initStateList( length );
+ curState = 0;
+ }
+
+
+#line 2580 "xmlparse.cpp"
+} break;
+case 55: {
+#line 344 "xmlparse.kl"
+
+ Attribute *idAttr = (&rhs[0]->user.token)->tag->findAttr( "id" );
+ if ( idAttr == 0 )
+ error((&rhs[0]->user.token)->loc) << "tag <state> requires an id attribute" << endl;
+ else {
+ int id = atoi( idAttr->value );
+ cgd->setId( curState, id );
+ }
+
+ Attribute *lengthAttr = (&rhs[0]->user.token)->tag->findAttr( "final" );
+ if ( lengthAttr != 0 )
+ cgd->setFinal( curState );
+ curState += 1;
+
+
+#line 2599 "xmlparse.cpp"
+} break;
+case 61: {
+#line 367 "xmlparse.kl"
+
+ char *ad = (&rhs[2]->user.token)->tag->content;
+
+ long toStateAction = readOffsetPtr( ad, &ad );
+ long fromStateAction = readOffsetPtr( ad, &ad );
+ long eofAction = readOffsetPtr( ad, &ad );
+
+ cgd->setStateActions( curState, toStateAction,
+ fromStateAction, eofAction );
+
+
+#line 2614 "xmlparse.cpp"
+} break;
+case 63: {
+#line 381 "xmlparse.kl"
+
+ Attribute *lengthAttr = (&rhs[0]->user.token)->tag->findAttr( "length" );
+ if ( lengthAttr == 0 )
+ error((&rhs[0]->user.token)->loc) << "tag <cond_list> requires a length attribute" << endl;
+ else {
+ ulong length = readLength( lengthAttr->value );
+ cgd->initStateCondList( curState, length );
+ curStateCond = 0;
+ }
+
+
+#line 2629 "xmlparse.cpp"
+} break;
+case 66: {
+#line 396 "xmlparse.kl"
+
+ char *td = (&rhs[2]->user.token)->tag->content;
+ Key lowKey = readKey( td, &td );
+ Key highKey = readKey( td, &td );
+ long condId = readOffsetPtr( td, &td );
+ cgd->addStateCond( curState, lowKey, highKey, condId );
+
+
+#line 2641 "xmlparse.cpp"
+} break;
+case 67: {
+#line 405 "xmlparse.kl"
+
+ cgd->finishTransList( curState );
+
+
+#line 2649 "xmlparse.cpp"
+} break;
+case 68: {
+#line 410 "xmlparse.kl"
+
+ Attribute *lengthAttr = (&rhs[0]->user.token)->tag->findAttr( "length" );
+ if ( lengthAttr == 0 )
+ error((&rhs[0]->user.token)->loc) << "tag <trans_list> requires a length attribute" << endl;
+ else {
+ unsigned long length = strtoul( lengthAttr->value, 0, 10 );
+ cgd->initTransList( curState, length );
+ curTrans = 0;
+ }
+
+
+#line 2664 "xmlparse.cpp"
+} break;
+case 71: {
+#line 425 "xmlparse.kl"
+
+ char *td = (&rhs[2]->user.token)->tag->content;
+ Key lowKey = readKey( td, &td );
+ Key highKey = readKey( td, &td );
+ long targ = readOffsetPtr( td, &td );
+ long action = readOffsetPtr( td, &td );
+
+ cgd->newTrans( curState, curTrans++, lowKey, highKey, targ, action );
+
+
+#line 2678 "xmlparse.cpp"
+} break;
+case 73: {
+#line 442 "xmlparse.kl"
+
+ Attribute *lengthAttr = (&rhs[0]->user.token)->tag->findAttr( "length" );
+ if ( lengthAttr == 0 )
+ error((&rhs[0]->user.token)->loc) << "tag <action_list> requires a length attribute" << endl;
+ else {
+ unsigned long length = strtoul( lengthAttr->value, 0, 10 );
+ cgd->initActionList( length );
+ curAction = 0;
+ }
+
+
+#line 2693 "xmlparse.cpp"
+} break;
+case 76: {
+#line 461 "xmlparse.kl"
+
+ Attribute *lineAttr = (&rhs[0]->user.token)->tag->findAttr( "line" );
+ Attribute *colAttr = (&rhs[0]->user.token)->tag->findAttr( "col" );
+ Attribute *nameAttr = (&rhs[0]->user.token)->tag->findAttr( "name" );
+ if ( lineAttr == 0 || colAttr == 0)
+ error((&rhs[0]->user.token)->loc) << "tag <action> requires a line and col attributes" << endl;
+ else {
+ unsigned long line = strtoul( lineAttr->value, 0, 10 );
+ unsigned long col = strtoul( colAttr->value, 0, 10 );
+
+ char *name = 0;
+ if ( nameAttr != 0 )
+ name = nameAttr->value;
+
+ cgd->newAction( curAction++, name, line, col, (&rhs[1]->user.inline_list)->inlineList );
+ }
+
+
+#line 2715 "xmlparse.cpp"
+} break;
+case 77: {
+#line 486 "xmlparse.kl"
+
+ /* Append the item to the list, return the list. */
+ (&rhs[0]->user.inline_list)->inlineList->append( (&rhs[1]->user.inline_item_type)->inlineItem );
+ (&redLel->user.inline_list)->inlineList = (&rhs[0]->user.inline_list)->inlineList;
+
+
+#line 2725 "xmlparse.cpp"
+} break;
+case 78: {
+#line 493 "xmlparse.kl"
+
+ /* Start with empty list. */
+ (&redLel->user.inline_list)->inlineList = new InlineList;
+
+
+#line 2734 "xmlparse.cpp"
+} break;
+case 79: {
+#line 505 "xmlparse.kl"
+ (&redLel->user.inline_item_type)->inlineItem = (&rhs[0]->user.inline_item_type)->inlineItem;
+
+#line 2740 "xmlparse.cpp"
+} break;
+case 80: {
+#line 506 "xmlparse.kl"
+ (&redLel->user.inline_item_type)->inlineItem = (&rhs[0]->user.inline_item_type)->inlineItem;
+
+#line 2746 "xmlparse.cpp"
+} break;
+case 81: {
+#line 507 "xmlparse.kl"
+ (&redLel->user.inline_item_type)->inlineItem = (&rhs[0]->user.inline_item_type)->inlineItem;
+
+#line 2752 "xmlparse.cpp"
+} break;
+case 82: {
+#line 508 "xmlparse.kl"
+ (&redLel->user.inline_item_type)->inlineItem = (&rhs[0]->user.inline_item_type)->inlineItem;
+
+#line 2758 "xmlparse.cpp"
+} break;
+case 83: {
+#line 509 "xmlparse.kl"
+ (&redLel->user.inline_item_type)->inlineItem = (&rhs[0]->user.inline_item_type)->inlineItem;
+
+#line 2764 "xmlparse.cpp"
+} break;
+case 84: {
+#line 510 "xmlparse.kl"
+ (&redLel->user.inline_item_type)->inlineItem = (&rhs[0]->user.inline_item_type)->inlineItem;
+
+#line 2770 "xmlparse.cpp"
+} break;
+case 85: {
+#line 511 "xmlparse.kl"
+ (&redLel->user.inline_item_type)->inlineItem = (&rhs[0]->user.inline_item_type)->inlineItem;
+
+#line 2776 "xmlparse.cpp"
+} break;
+case 86: {
+#line 512 "xmlparse.kl"
+ (&redLel->user.inline_item_type)->inlineItem = (&rhs[0]->user.inline_item_type)->inlineItem;
+
+#line 2782 "xmlparse.cpp"
+} break;
+case 87: {
+#line 513 "xmlparse.kl"
+ (&redLel->user.inline_item_type)->inlineItem = (&rhs[0]->user.inline_item_type)->inlineItem;
+
+#line 2788 "xmlparse.cpp"
+} break;
+case 88: {
+#line 514 "xmlparse.kl"
+ (&redLel->user.inline_item_type)->inlineItem = (&rhs[0]->user.inline_item_type)->inlineItem;
+
+#line 2794 "xmlparse.cpp"
+} break;
+case 89: {
+#line 515 "xmlparse.kl"
+ (&redLel->user.inline_item_type)->inlineItem = (&rhs[0]->user.inline_item_type)->inlineItem;
+
+#line 2800 "xmlparse.cpp"
+} break;
+case 90: {
+#line 516 "xmlparse.kl"
+ (&redLel->user.inline_item_type)->inlineItem = (&rhs[0]->user.inline_item_type)->inlineItem;
+
+#line 2806 "xmlparse.cpp"
+} break;
+case 91: {
+#line 517 "xmlparse.kl"
+ (&redLel->user.inline_item_type)->inlineItem = (&rhs[0]->user.inline_item_type)->inlineItem;
+
+#line 2812 "xmlparse.cpp"
+} break;
+case 92: {
+#line 518 "xmlparse.kl"
+ (&redLel->user.inline_item_type)->inlineItem = (&rhs[0]->user.inline_item_type)->inlineItem;
+
+#line 2818 "xmlparse.cpp"
+} break;
+case 93: {
+#line 519 "xmlparse.kl"
+ (&redLel->user.inline_item_type)->inlineItem = (&rhs[0]->user.inline_item_type)->inlineItem;
+
+#line 2824 "xmlparse.cpp"
+} break;
+case 94: {
+#line 520 "xmlparse.kl"
+ (&redLel->user.inline_item_type)->inlineItem = (&rhs[0]->user.inline_item_type)->inlineItem;
+
+#line 2830 "xmlparse.cpp"
+} break;
+case 95: {
+#line 521 "xmlparse.kl"
+ (&redLel->user.inline_item_type)->inlineItem = (&rhs[0]->user.inline_item_type)->inlineItem;
+
+#line 2836 "xmlparse.cpp"
+} break;
+case 96: {
+#line 522 "xmlparse.kl"
+ (&redLel->user.inline_item_type)->inlineItem = (&rhs[0]->user.inline_item_type)->inlineItem;
+
+#line 2842 "xmlparse.cpp"
+} break;
+case 97: {
+#line 523 "xmlparse.kl"
+ (&redLel->user.inline_item_type)->inlineItem = (&rhs[0]->user.inline_item_type)->inlineItem;
+
+#line 2848 "xmlparse.cpp"
+} break;
+case 98: {
+#line 524 "xmlparse.kl"
+ (&redLel->user.inline_item_type)->inlineItem = (&rhs[0]->user.inline_item_type)->inlineItem;
+
+#line 2854 "xmlparse.cpp"
+} break;
+case 99: {
+#line 525 "xmlparse.kl"
+ (&redLel->user.inline_item_type)->inlineItem = (&rhs[0]->user.inline_item_type)->inlineItem;
+
+#line 2860 "xmlparse.cpp"
+} break;
+case 100: {
+#line 526 "xmlparse.kl"
+ (&redLel->user.inline_item_type)->inlineItem = (&rhs[0]->user.inline_item_type)->inlineItem;
+
+#line 2866 "xmlparse.cpp"
+} break;
+case 101: {
+#line 527 "xmlparse.kl"
+ (&redLel->user.inline_item_type)->inlineItem = (&rhs[0]->user.inline_item_type)->inlineItem;
+
+#line 2872 "xmlparse.cpp"
+} break;
+case 102: {
+#line 528 "xmlparse.kl"
+ (&redLel->user.inline_item_type)->inlineItem = (&rhs[0]->user.inline_item_type)->inlineItem;
+
+#line 2878 "xmlparse.cpp"
+} break;
+case 103: {
+#line 529 "xmlparse.kl"
+ (&redLel->user.inline_item_type)->inlineItem = (&rhs[0]->user.inline_item_type)->inlineItem;
+
+#line 2884 "xmlparse.cpp"
+} break;
+case 104: {
+#line 530 "xmlparse.kl"
+ (&redLel->user.inline_item_type)->inlineItem = (&rhs[0]->user.inline_item_type)->inlineItem;
+
+#line 2890 "xmlparse.cpp"
+} break;
+case 105: {
+#line 560 "xmlparse.kl"
+
+ (&redLel->user.inline_item_type)->inlineItem = new InlineItem( InputLoc(), InlineItem::Text );
+ (&redLel->user.inline_item_type)->inlineItem->data = (&rhs[2]->user.token)->tag->content;
+
+
+#line 2899 "xmlparse.cpp"
+} break;
+case 106: {
+#line 566 "xmlparse.kl"
+
+ int targ = strtol( (&rhs[2]->user.token)->tag->content, 0, 10 );
+ (&redLel->user.inline_item_type)->inlineItem = new InlineItem( InputLoc(), InlineItem::Goto );
+ (&redLel->user.inline_item_type)->inlineItem->targId = targ;
+
+
+#line 2909 "xmlparse.cpp"
+} break;
+case 107: {
+#line 573 "xmlparse.kl"
+
+ int targ = strtol( (&rhs[2]->user.token)->tag->content, 0, 10 );
+ (&redLel->user.inline_item_type)->inlineItem = new InlineItem( InputLoc(), InlineItem::Call );
+ (&redLel->user.inline_item_type)->inlineItem->targId = targ;
+
+
+#line 2919 "xmlparse.cpp"
+} break;
+case 108: {
+#line 580 "xmlparse.kl"
+
+ int targ = strtol( (&rhs[2]->user.token)->tag->content, 0, 10 );
+ (&redLel->user.inline_item_type)->inlineItem = new InlineItem( InputLoc(), InlineItem::Next );
+ (&redLel->user.inline_item_type)->inlineItem->targId = targ;
+
+
+#line 2929 "xmlparse.cpp"
+} break;
+case 109: {
+#line 587 "xmlparse.kl"
+
+ (&redLel->user.inline_item_type)->inlineItem = new InlineItem( InputLoc(), InlineItem::GotoExpr );
+ (&redLel->user.inline_item_type)->inlineItem->children = (&rhs[1]->user.inline_list)->inlineList;
+
+
+#line 2938 "xmlparse.cpp"
+} break;
+case 110: {
+#line 593 "xmlparse.kl"
+
+ (&redLel->user.inline_item_type)->inlineItem = new InlineItem( InputLoc(), InlineItem::CallExpr );
+ (&redLel->user.inline_item_type)->inlineItem->children = (&rhs[1]->user.inline_list)->inlineList;
+
+
+#line 2947 "xmlparse.cpp"
+} break;
+case 111: {
+#line 599 "xmlparse.kl"
+
+ (&redLel->user.inline_item_type)->inlineItem = new InlineItem( InputLoc(), InlineItem::NextExpr );
+ (&redLel->user.inline_item_type)->inlineItem->children = (&rhs[1]->user.inline_list)->inlineList;
+
+
+#line 2956 "xmlparse.cpp"
+} break;
+case 112: {
+#line 605 "xmlparse.kl"
+
+ (&redLel->user.inline_item_type)->inlineItem = new InlineItem( InputLoc(), InlineItem::Ret );
+
+
+#line 2964 "xmlparse.cpp"
+} break;
+case 113: {
+#line 610 "xmlparse.kl"
+
+ (&redLel->user.inline_item_type)->inlineItem = new InlineItem( InputLoc(), InlineItem::Break );
+
+
+#line 2972 "xmlparse.cpp"
+} break;
+case 114: {
+#line 615 "xmlparse.kl"
+
+ (&redLel->user.inline_item_type)->inlineItem = new InlineItem( InputLoc(), InlineItem::PChar );
+
+
+#line 2980 "xmlparse.cpp"
+} break;
+case 115: {
+#line 620 "xmlparse.kl"
+
+ (&redLel->user.inline_item_type)->inlineItem = new InlineItem( InputLoc(), InlineItem::Char );
+
+
+#line 2988 "xmlparse.cpp"
+} break;
+case 116: {
+#line 625 "xmlparse.kl"
+
+ (&redLel->user.inline_item_type)->inlineItem = new InlineItem( InputLoc(), InlineItem::Hold );
+
+
+#line 2996 "xmlparse.cpp"
+} break;
+case 117: {
+#line 630 "xmlparse.kl"
+
+ (&redLel->user.inline_item_type)->inlineItem = new InlineItem( InputLoc(), InlineItem::Exec );
+ (&redLel->user.inline_item_type)->inlineItem->children = (&rhs[1]->user.inline_list)->inlineList;
+
+
+#line 3005 "xmlparse.cpp"
+} break;
+case 118: {
+#line 636 "xmlparse.kl"
+
+ (&redLel->user.inline_item_type)->inlineItem = new InlineItem( InputLoc(), InlineItem::HoldTE );
+
+
+#line 3013 "xmlparse.cpp"
+} break;
+case 119: {
+#line 641 "xmlparse.kl"
+
+ (&redLel->user.inline_item_type)->inlineItem = new InlineItem( InputLoc(), InlineItem::ExecTE );
+ (&redLel->user.inline_item_type)->inlineItem->children = (&rhs[1]->user.inline_list)->inlineList;
+
+
+#line 3022 "xmlparse.cpp"
+} break;
+case 120: {
+#line 647 "xmlparse.kl"
+
+ (&redLel->user.inline_item_type)->inlineItem = new InlineItem( InputLoc(), InlineItem::Curs );
+
+
+#line 3030 "xmlparse.cpp"
+} break;
+case 121: {
+#line 652 "xmlparse.kl"
+
+ (&redLel->user.inline_item_type)->inlineItem = new InlineItem( InputLoc(), InlineItem::Targs );
+
+
+#line 3038 "xmlparse.cpp"
+} break;
+case 122: {
+#line 657 "xmlparse.kl"
+
+ int targ = strtol( (&rhs[2]->user.token)->tag->content, 0, 10 );
+ (&redLel->user.inline_item_type)->inlineItem = new InlineItem( InputLoc(), InlineItem::Entry );
+ (&redLel->user.inline_item_type)->inlineItem->targId = targ;
+
+
+#line 3048 "xmlparse.cpp"
+} break;
+case 123: {
+#line 664 "xmlparse.kl"
+
+ (&redLel->user.inline_item_type)->inlineItem = new InlineItem( InputLoc(), InlineItem::LmInitTokStart );
+
+
+#line 3056 "xmlparse.cpp"
+} break;
+case 124: {
+#line 669 "xmlparse.kl"
+
+ (&redLel->user.inline_item_type)->inlineItem = new InlineItem( InputLoc(), InlineItem::LmInitAct );
+
+
+#line 3064 "xmlparse.cpp"
+} break;
+case 125: {
+#line 674 "xmlparse.kl"
+
+ (&redLel->user.inline_item_type)->inlineItem = new InlineItem( InputLoc(), InlineItem::LmGetTokEnd );
+
+
+#line 3072 "xmlparse.cpp"
+} break;
+case 126: {
+#line 679 "xmlparse.kl"
+
+ (&redLel->user.inline_item_type)->inlineItem = new InlineItem( InputLoc(), InlineItem::LmSetTokStart );
+ cgd->hasLongestMatch = true;
+
+
+#line 3081 "xmlparse.cpp"
+} break;
+case 127: {
+#line 685 "xmlparse.kl"
+
+ (&redLel->user.inline_item_type)->inlineItem = new InlineItem( InputLoc(), InlineItem::LmSetTokEnd );
+ (&redLel->user.inline_item_type)->inlineItem->offset = strtol( (&rhs[2]->user.token)->tag->content, 0, 10 );
+
+
+#line 3090 "xmlparse.cpp"
+} break;
+case 128: {
+#line 691 "xmlparse.kl"
+
+ (&redLel->user.inline_item_type)->inlineItem = new InlineItem( InputLoc(), InlineItem::LmSetActId );
+ (&redLel->user.inline_item_type)->inlineItem->lmId = strtol( (&rhs[2]->user.token)->tag->content, 0, 10 );
+
+
+#line 3099 "xmlparse.cpp"
+} break;
+case 129: {
+#line 697 "xmlparse.kl"
+
+ (&redLel->user.inline_item_type)->inlineItem = new InlineItem( InputLoc(), InlineItem::SubAction );
+ (&redLel->user.inline_item_type)->inlineItem->children = (&rhs[1]->user.inline_list)->inlineList;
+
+
+#line 3108 "xmlparse.cpp"
+} break;
+case 130: {
+#line 704 "xmlparse.kl"
+
+ bool handlesError = false;
+ Attribute *handlesErrorAttr = (&rhs[0]->user.token)->tag->findAttr( "handles_error" );
+ if ( handlesErrorAttr != 0 )
+ handlesError = true;
+
+ (&redLel->user.inline_item_type)->inlineItem = new InlineItem( InputLoc(), InlineItem::LmSwitch );
+ (&redLel->user.inline_item_type)->inlineItem->children = (&rhs[1]->user.lm_action_list)->inlineList;
+ (&redLel->user.inline_item_type)->inlineItem->handlesError = handlesError;
+
+
+#line 3123 "xmlparse.cpp"
+} break;
+case 131: {
+#line 721 "xmlparse.kl"
+
+ (&redLel->user.lm_action_list)->inlineList = (&rhs[0]->user.lm_action_list)->inlineList;
+ (&redLel->user.lm_action_list)->inlineList->append( (&rhs[1]->user.inline_item_type)->inlineItem );
+
+
+#line 3132 "xmlparse.cpp"
+} break;
+case 132: {
+#line 726 "xmlparse.kl"
+
+ (&redLel->user.lm_action_list)->inlineList = new InlineList;
+
+
+#line 3140 "xmlparse.cpp"
+} break;
+case 133: {
+#line 733 "xmlparse.kl"
+
+ (&redLel->user.inline_item_type)->inlineItem = new InlineItem( InputLoc(), InlineItem::SubAction );
+ (&redLel->user.inline_item_type)->inlineItem->children = (&rhs[1]->user.inline_list)->inlineList;
+
+ Attribute *idAttr = (&rhs[0]->user.token)->tag->findAttr( "id" );
+ if ( idAttr != 0 ) {
+ unsigned long id = strtoul( idAttr->value, 0, 10 );
+ (&redLel->user.inline_item_type)->inlineItem->lmId = id;
+ }
+
+
+#line 3155 "xmlparse.cpp"
+} break;
+case 135: {
+#line 752 "xmlparse.kl"
+
+ Attribute *lengthAttr = (&rhs[0]->user.token)->tag->findAttr( "length" );
+ if ( lengthAttr == 0 ) {
+ error((&rhs[0]->user.token)->loc) << "tag <action_table_list> requires "
+ "a length attribute" << endl;
+ }
+ else {
+ unsigned long length = strtoul( lengthAttr->value, 0, 10 );
+ cgd->initActionTableList( length );
+ curActionTable = 0;
+ }
+
+
+#line 3172 "xmlparse.cpp"
+} break;
+case 138: {
+#line 769 "xmlparse.kl"
+
+ /* Find the length of the action table. */
+ Attribute *lengthAttr = (&rhs[0]->user.token)->tag->findAttr( "length" );
+ if ( lengthAttr == 0 )
+ error((&rhs[0]->user.token)->loc) << "tag <at> requires a length attribute" << endl;
+ else {
+ unsigned long length = strtoul( lengthAttr->value, 0, 10 );
+
+ /* Collect the action table. */
+ RedAction *redAct = cgd->allActionTables + curActionTable;
+ redAct->actListId = curActionTable;
+ redAct->key.setAsNew( length );
+ char *ptr = (&rhs[2]->user.token)->tag->content;
+ int pos = 0;
+ while ( *ptr != 0 ) {
+ unsigned long actionId = strtoul( ptr, &ptr, 10 );
+ redAct->key[pos].key = 0;
+ redAct->key[pos].value = cgd->allActions+actionId;
+ pos += 1;
+ }
+
+ /* Insert into the action table map. */
+ cgd->redFsm->actionMap.insert( redAct );
+ }
+
+ curActionTable += 1;
+
+
+#line 3204 "xmlparse.cpp"
+} break;
+case 140: {
+#line 804 "xmlparse.kl"
+
+ Attribute *lengthAttr = (&rhs[0]->user.token)->tag->findAttr( "length" );
+ if ( lengthAttr == 0 ) {
+ error((&rhs[0]->user.token)->loc) << "tag <cond_space_list> "
+ "requires a length attribute" << endl;
+ }
+ else {
+ ulong length = readLength( lengthAttr->value );
+ cgd->initCondSpaceList( length );
+ curCondSpace = 0;
+ }
+
+
+#line 3221 "xmlparse.cpp"
+} break;
+case 143: {
+#line 821 "xmlparse.kl"
+
+ Attribute *lengthAttr = (&rhs[0]->user.token)->tag->findAttr( "length" );
+ Attribute *idAttr = (&rhs[0]->user.token)->tag->findAttr( "id" );
+ if ( lengthAttr == 0 )
+ error((&rhs[0]->user.token)->loc) << "tag <cond_space> requires a length attribute" << endl;
+ else {
+ if ( lengthAttr == 0 )
+ error((&rhs[0]->user.token)->loc) << "tag <cond_space> requires an id attribute" << endl;
+ else {
+ unsigned long condSpaceId = strtoul( idAttr->value, 0, 10 );
+ ulong length = readLength( lengthAttr->value );
+
+ char *td = (&rhs[2]->user.token)->tag->content;
+ Key baseKey = readKey( td, &td );
+
+ cgd->newCondSpace( curCondSpace, condSpaceId, baseKey );
+ for ( ulong a = 0; a < length; a++ ) {
+ long actionOffset = readOffsetPtr( td, &td );
+ cgd->condSpaceItem( curCondSpace, actionOffset );
+ }
+ curCondSpace += 1;
+ }
+ }
+
+
+#line 3250 "xmlparse.cpp"
+} break;
+}
+ }
+}
+
+ if ( lel->child != 0 ) {
+ struct Parser_LangEl *first = lel->child;
+ struct Parser_LangEl *child = lel->child;
+ numNodes -= 1;
+ lel->child = 0;
+ while ( child->next != 0 ) {
+ child = child->next;
+ numNodes -= 1;
+ }
+ child->next = pool;
+ pool = first;
+ }
+ }
+
+hit_final:
+ if ( sp > 0 ) {
+ /* Figure out which place to return to. */
+ if ( cmStack[sp-1]->next == lel ) {
+ lel = cmStack[--sp];
+ goto final_reverse;
+ }
+ else {
+ lel = cmStack[--sp];
+ goto final_upwards;
+ }
+ }
+
+ lastFinal = lel;
+ free( cmStack );
+ }
+ }
+ }
+
+ if ( *action & 0x2 ) {
+ int fssRed = *action >> 2;
+ int reduction = Parser_fssProdIdIndex[fssRed];
+ struct Parser_LangEl *redLel;
+ if ( pool == 0 ) {
+ if ( freshPos == 8128 ) {
+ freshEl = (struct Parser_LangEl*) malloc(
+ sizeof(struct Parser_LangEl)*8128);
+ #ifdef LOG_ACTIONS
+ cerr << "allocating 8128 LangEls" << endl;
+ #endif
+ freshPos = 0;
+ }
+ redLel = freshEl + freshPos++;
+ }
+ else {
+ redLel = pool;
+ pool = pool->next;
+ }
+ numNodes += 1;
+ redLel->type = Parser_prodLhsIds[reduction];
+ redLel->reduction = reduction;
+ redLel->child = 0;
+ redLel->next = 0;
+ redLel->retry = (lel->retry << 16);
+ lel->retry &= 0xffff0000;
+
+ rhsLen = Parser_fssProdLengths[fssRed];
+ if ( rhsLen > 0 ) {
+ int r;
+ for ( r = rhsLen-1; r > 0; r-- ) {
+ rhs[r] = stackTop;
+ stackTop = stackTop->next;
+ }
+ rhs[0] = stackTop;
+ stackTop = stackTop->next;
+ rhs[0]->next = 0;
+ }
+ #ifdef LOG_ACTIONS
+ cerr << "reduced: "
+ << Parser_prodNames[reduction]
+ << " rhsLen: " << rhsLen;
+ #endif
+ if ( action[1] == 0 )
+ redLel->retry = 0;
+ else {
+ redLel->retry += 0x10000;
+ numRetry += 1;
+ #ifdef LOG_ACTIONS
+ cerr << " retry: " << redLel;
+ #endif
+ }
+
+ #ifdef LOG_ACTIONS
+ cerr << endl;
+ #endif
+
+ if ( rhsLen == 0 ) {
+ redLel->file = lel->file;
+ redLel->line = lel->line;
+ targState = curs;
+ }
+ else {
+ redLel->child = rhs[rhsLen-1];
+ redLel->file = rhs[0]->file;
+ redLel->line = rhs[0]->line;
+ targState = rhs[0]->state;
+ }
+
+ if ( induceReject ) {
+ #ifdef LOG_ACTIONS
+ cerr << "error induced during reduction of " <<
+ Parser_lelNames[redLel->type] << endl;
+ #endif
+ redLel->state = curs;
+ redLel->next = stackTop;
+ stackTop = redLel;
+ curs = targState;
+ goto parseError;
+ }
+ else {
+ redLel->next = input;
+ input = redLel;
+ }
+ }
+
+
+ curs = targState;
+ goto again;
+
+parseError:
+ #ifdef LOG_BACKTRACK
+ cerr << "hit error" << endl;
+ #endif
+ if ( numRetry > 0 ) {
+ while ( 1 ) {
+ struct Parser_LangEl *redLel = stackTop;
+ if ( stackTop->type < 186 ) {
+ #ifdef LOG_BACKTRACK
+ cerr << "backing up over terminal: " <<
+ Parser_lelNames[stackTop->type] << endl;
+ #endif
+ stackTop = stackTop->next;
+ redLel->next = input;
+ input = redLel;
+ }
+ else {
+ #ifdef LOG_BACKTRACK
+ cerr << "backing up over non-terminal: " <<
+ Parser_lelNames[stackTop->type] << endl;
+ #endif
+ stackTop = stackTop->next;
+ struct Parser_LangEl *first = redLel->child;
+ if ( first == 0 )
+ rhsLen = 0;
+ else {
+ rhsLen = 1;
+ while ( first->next != 0 ) {
+ first = first->next;
+ rhsLen += 1;
+ }
+ first->next = stackTop;
+ stackTop = redLel->child;
+
+ struct Parser_LangEl *rhsEl = stackTop;
+ int p = rhsLen;
+ while ( p > 0 ) {
+ rhs[--p] = rhsEl;
+ rhsEl = rhsEl->next;
+ }
+ }
+ redLel->next = pool;
+ pool = redLel;
+ numNodes -= 1;
+ }
+
+ if ( redLel->retry > 0 ) {
+ #ifdef LOG_BACKTRACK
+ cerr << "found retry targ: " << redLel << endl;
+ #endif
+ numRetry -= 1;
+ #ifdef LOG_BACKTRACK
+ cerr << "found retry: " << redLel << endl;
+ #endif
+ if ( redLel->retry & 0x0000ffff )
+ curs = input->state;
+ else {
+ input->retry = redLel->retry >> 16;
+ if ( stackTop->state < 0 )
+ curs = Parser_startState;
+ else {
+ curs = Parser_targs[(int)Parser_indicies[Parser_offsets[stackTop->state] + (stackTop->type - Parser_keys[stackTop->state<<1])]];
+ }
+ }
+ goto again;
+ }
+ }
+ }
+ curs = -1;
+ errCount += 1;
+_out: {}
+#line 861 "xmlparse.kl"
+ return errCount == 0 ? 0 : -1;
+}
+
+
+unsigned long readLength( char *td )
+{
+ return strtoul( td, 0, 10 );
+}
+
+Key readKey( char *td, char **end )
+{
+ if ( keyOps->isSigned )
+ return Key( strtol( td, end, 10 ) );
+ else
+ return Key( strtoul( td, end, 10 ) );
+}
+
+long readOffsetPtr( char *td, char **end )
+{
+ while ( *td == ' ' || *td == '\t' )
+ td++;
+
+ if ( *td == 'x' ) {
+ if ( end != 0 )
+ *end = td + 1;
+ return -1;
+ }
+
+ return strtol( td, end, 10 );
+}
+
+ostream &Parser::warning( const InputLoc &loc )
+{
+ cerr << fileName << ":" << loc.line << ":" << loc.col << ": warning: ";
+ return cerr;
+}
+
+ostream &Parser::error( const InputLoc &loc )
+{
+ errCount += 1;
+ assert( fileName != 0 );
+ cerr << fileName << ":" << loc.line << ":" << loc.col << ": ";
+ return cerr;
+}
+
+
+ostream &Parser::parser_error( int tokId, Token &token )
+{
+ errCount += 1;
+ assert( fileName != 0 );
+ cerr << fileName << ":" << token.loc.line << ":" << token.loc.col;
+ if ( token.tag != 0 ) {
+ if ( token.tag->tagId == 0 )
+ cerr << ": at unknown tag";
+ else
+ cerr << ": at tag <" << token.tag->tagId->name << ">";
+ }
+ cerr << ": ";
+
+ return cerr;
+}
+
+int Parser::token( int tokenId, Token &tok )
+{
+ int res = parseLangEl( tokenId, &tok );
+ if ( res < 0 ) {
+ parser_error( tokenId, tok ) << "parse error" << endl;
+ exit(1);
+ }
+ return res;
+}
+
+int Parser::token( int tokenId, int col, int line )
+{
+ Token tok;
+ tok.loc.col = col;
+ tok.loc.line = line;
+ tok.tag = 0;
+ return token( tokenId, tok );
+}
+
+int Parser::token( XMLTag *tag, int col, int line )
+{
+ Token tok;
+ tok.loc.col = col;
+ tok.loc.line = line;
+ tok.tag = tag;
+
+ if ( tag->type == XMLTag::Close ) {
+ int res = token( '/', tok );
+ if ( res < 0 )
+ return res;
+ }
+
+ tok.tag = tag;
+ return token( tag->tagId != 0 ? tag->tagId->id : TAG_unknown, tok );
+}
diff --git a/contrib/tools/ragel5/redfsm/xmlparse.h b/contrib/tools/ragel5/redfsm/xmlparse.h
new file mode 100644
index 0000000000..b51a7cd67a
--- /dev/null
+++ b/contrib/tools/ragel5/redfsm/xmlparse.h
@@ -0,0 +1,228 @@
+/* Automatically generated by Kelbt from "xmlparse.kh".
+ *
+ * Parts of this file are copied from Kelbt source covered by the GNU
+ * GPL. As a special exception, you may use the parts of this file copied
+ * from Kelbt source without restriction. The remainder is derived from
+ * "xmlparse.kh" and inherits the copyright status of that file.
+ */
+
+#line 1 "xmlparse.kh"
+/*
+ * Copyright 2001-2007 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#ifndef _XMLPARSE_H
+#define _XMLPARSE_H
+
+#include "vector.h"
+#include "gendata.h"
+#include <iostream>
+
+using std::ostream;
+
+struct AttrMarker
+{
+ char *id;
+ int idLen;
+ char *value;
+ int valueLen;
+};
+
+struct Attribute
+{
+ char *id;
+ char *value;
+};
+
+typedef Vector<AttrMarker> AttrMkList;
+typedef Vector<Attribute> AttrList;
+struct XMLTagHashPair;
+
+struct XMLTag
+{
+ enum TagType { Open, Close };
+
+ XMLTag( XMLTagHashPair *tagId, TagType type ) :
+ tagId(tagId), type(type),
+ content(0), attrList(0) {}
+
+ Attribute *findAttr(const char *id )
+ {
+ if ( attrList != 0 ) {
+ for ( AttrList::Iter attr = *attrList; attr.lte(); attr++ ) {
+ if ( strcmp( id, attr->id ) == 0 )
+ return attr;
+ }
+ }
+ return 0;
+ }
+
+ XMLTagHashPair *tagId;
+ TagType type;
+
+ /* Content is associtated with closing tags. */
+ char *content;
+
+ /* Attribute lists are associated with opening tags. */
+ AttrList *attrList;
+};
+
+
+struct XMLTagHashPair
+{
+ const char *name;
+ int id;
+};
+
+struct Token
+{
+ XMLTag *tag;
+ InputLoc loc;
+};
+
+struct InlineItem;
+struct InlineList;
+
+struct LmSwitchVect;
+struct LmSwitchAction;
+
+struct Parser
+{
+ #line 117 "xmlparse.kh"
+
+
+ #line 111 "xmlparse.h"
+ struct Parser_LangEl *freshEl;
+ int freshPos;
+ struct Parser_LangEl *pool;
+ int numRetry;
+ int numNodes;
+ struct Parser_LangEl *stackTop;
+ struct Parser_LangEl *lastFinal;
+ int errCount;
+ int curs;
+#line 120 "xmlparse.kh"
+
+ void init();
+ int parseLangEl( int type, const Token *token );
+
+ Parser(const char *fileName, bool outputActive, bool wantComplete ) :
+ fileName(fileName), sourceFileName(0), outStream(0),
+ outputActive(outputActive), wantComplete(wantComplete),
+ cgd(0) { }
+
+ int token( int tokenId, Token &token );
+ int token( int tokenId, int col, int line );
+ int token( XMLTag *tag, int col, int line );
+
+ /* Report an error encountered by the parser. */
+ ostream &warning( const InputLoc &loc );
+ ostream &error();
+ ostream &error( const InputLoc &loc );
+ ostream &parser_error( int tokId, Token &token );
+
+ /* The name of the root section, this does not change during an include. */
+ const char *fileName;
+ char *sourceFileName;
+ ostream *outStream;
+ bool outputActive;
+ bool wantComplete;
+
+ /* Collected during parsing. */
+ char *attrKey;
+ char *attrValue;
+ int curAction;
+ int curActionTable;
+ int curTrans;
+ int curState;
+ int curCondSpace;
+ int curStateCond;
+
+ CodeGenData *cgd;
+ CodeGenMap codeGenMap;
+
+ Vector <char*> writeOptions;
+};
+
+#line 164 "xmlparse.h"
+#define TAG_unknown 128
+#define TAG_ragel 129
+#define TAG_ragel_def 130
+#define TAG_host 131
+#define TAG_state_list 132
+#define TAG_state 133
+#define TAG_trans_list 134
+#define TAG_t 135
+#define TAG_machine 136
+#define TAG_start_state 137
+#define TAG_error_state 138
+#define TAG_action_list 139
+#define TAG_action_table_list 140
+#define TAG_action 141
+#define TAG_action_table 142
+#define TAG_alphtype 143
+#define TAG_element 144
+#define TAG_getkey 145
+#define TAG_state_actions 146
+#define TAG_entry_points 147
+#define TAG_sub_action 148
+#define TAG_cond_space_list 149
+#define TAG_cond_space 150
+#define TAG_cond_list 151
+#define TAG_c 152
+#define TAG_exports 153
+#define TAG_ex 154
+#define TAG_text 155
+#define TAG_goto 156
+#define TAG_call 157
+#define TAG_next 158
+#define TAG_goto_expr 159
+#define TAG_call_expr 160
+#define TAG_next_expr 161
+#define TAG_ret 162
+#define TAG_pchar 163
+#define TAG_char 164
+#define TAG_hold 165
+#define TAG_exec 166
+#define TAG_holdte 167
+#define TAG_execte 168
+#define TAG_curs 169
+#define TAG_targs 170
+#define TAG_entry 171
+#define TAG_data 172
+#define TAG_lm_switch 173
+#define TAG_init_act 174
+#define TAG_set_act 175
+#define TAG_set_tokend 176
+#define TAG_get_tokend 177
+#define TAG_init_tokstart 178
+#define TAG_set_tokstart 179
+#define TAG_write 180
+#define TAG_curstate 181
+#define TAG_access 182
+#define TAG_break 183
+#define TAG_arg 184
+#define _eof 185
+
+#line 163 "xmlparse.kh"
+
+int xml_parse( std::istream &input, const char *fileName,
+ bool outputActive, bool wantComplete );
+
+#endif /* _XMLPARSE_H */
diff --git a/contrib/tools/ragel5/redfsm/xmlscan.cpp b/contrib/tools/ragel5/redfsm/xmlscan.cpp
new file mode 100644
index 0000000000..a3d979a0ff
--- /dev/null
+++ b/contrib/tools/ragel5/redfsm/xmlscan.cpp
@@ -0,0 +1,925 @@
+#line 1 "xmlscan.rl"
+/*
+ * Copyright 2001-2007 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include <iostream>
+#include <string.h>
+#include "vector.h"
+#include "xmlparse.h"
+#include "buffer.h"
+
+using std::istream;
+using std::cout;
+using std::cerr;
+using std::endl;
+
+#define BUFSIZE 4096
+
+
+#line 37 "xmlscan.cpp"
+static const int Scanner_start = 20;
+
+static const int Scanner_first_final = 20;
+
+static const int Scanner_error = 0;
+
+#line 37 "xmlscan.rl"
+
+#include "phash.h"
+
+struct Scanner
+{
+ Scanner(const char *fileName, istream &input ) :
+ fileName(fileName),
+ input(input),
+ curline(1),
+ curcol(1),
+ p(0), pe(0),
+ done(false),
+ data(0), data_len(0),
+ value(0)
+ {
+
+#line 69 "xmlscan.cpp"
+ {
+ cs = Scanner_start;
+ tokstart = 0;
+ tokend = 0;
+ act = 0;
+ }
+#line 63 "xmlscan.rl"
+
+ }
+
+ int scan();
+ void adjustAttrPointers( int distance );
+ std::ostream &error();
+
+ const char *fileName;
+ istream &input;
+
+ /* Scanner State. */
+ int cs, act, have, curline, curcol;
+ char *tokstart, *tokend;
+ char *p, *pe;
+ int done;
+
+ /* Token data */
+ char *data;
+ int data_len;
+ int value;
+ AttrMkList attrMkList;
+ Buffer buffer;
+ char *tag_id_start;
+ int tag_id_len;
+ int token_col, token_line;
+
+ char buf[BUFSIZE];
+};
+
+
+#define TK_NO_TOKEN (-1)
+#define TK_ERR 1
+#define TK_SPACE 2
+#define TK_EOF 3
+#define TK_OpenTag 4
+#define TK_CloseTag 5
+
+#define ret_tok( _tok ) token = (_tok); data = tokstart
+
+void Scanner::adjustAttrPointers( int distance )
+{
+ for ( AttrMkList::Iter attr = attrMkList; attr.lte(); attr++ ) {
+ attr->id -= distance;
+ attr->value -= distance;
+ }
+}
+
+/* There is no claim that this is a proper XML parser, but it is good
+ * enough for our purposes. */
+#line 178 "xmlscan.rl"
+
+
+int Scanner::scan( )
+{
+ int token = TK_NO_TOKEN;
+ int space = 0, readlen = 0;
+ char *attr_id_start = 0;
+ char *attr_value_start = 0;
+ int attr_id_len = 0;
+ int attr_value_len = 0;
+
+ attrMkList.empty();
+ buffer.clear();
+
+ while ( 1 ) {
+ if ( p == pe ) {
+ //printf("scanner: need more data\n");
+
+ if ( tokstart == 0 )
+ have = 0;
+ else {
+ /* There is data that needs to be shifted over. */
+ //printf("scanner: buffer broken mid token\n");
+ have = pe - tokstart;
+ memmove( buf, tokstart, have );
+
+ int distance = tokstart - buf;
+ tokend -= distance;
+ tag_id_start -= distance;
+ attr_id_start -= distance;
+ attr_value_start -= distance;
+ adjustAttrPointers( distance );
+ tokstart = buf;
+ }
+
+ p = buf + have;
+ space = BUFSIZE - have;
+
+ if ( space == 0 ) {
+ /* We filled up the buffer trying to scan a token. */
+ return TK_SPACE;
+ }
+
+ if ( done ) {
+ //printf("scanner: end of file\n");
+ p[0] = 0;
+ readlen = 1;
+ }
+ else {
+ input.read( p, space );
+ readlen = input.gcount();
+ if ( input.eof() ) {
+ //printf("scanner: setting done flag\n");
+ done = 1;
+ }
+ }
+
+ pe = p + readlen;
+ }
+
+
+#line 188 "xmlscan.cpp"
+ {
+ if ( p == pe )
+ goto _out;
+ switch ( cs )
+ {
+tr6:
+#line 115 "xmlscan.rl"
+ { curcol++; }
+#line 168 "xmlscan.rl"
+ {tokend = p+1;{ buffer.append( '&' ); }{p = ((tokend))-1;}}
+ goto st20;
+tr8:
+#line 115 "xmlscan.rl"
+ { curcol++; }
+#line 172 "xmlscan.rl"
+ {tokend = p+1;{ buffer.append( '>' ); }{p = ((tokend))-1;}}
+ goto st20;
+tr10:
+#line 115 "xmlscan.rl"
+ { curcol++; }
+#line 170 "xmlscan.rl"
+ {tokend = p+1;{ buffer.append( '<' ); }{p = ((tokend))-1;}}
+ goto st20;
+tr20:
+#line 150 "xmlscan.rl"
+ { tag_id_len = p - tag_id_start; }
+#line 115 "xmlscan.rl"
+ { curcol++; }
+#line 160 "xmlscan.rl"
+ {tokend = p+1;{ ret_tok( TK_CloseTag ); {{p = ((tokend))-1;}goto _out20;} }{p = ((tokend))-1;}}
+ goto st20;
+tr23:
+#line 115 "xmlscan.rl"
+ { curcol++; }
+#line 160 "xmlscan.rl"
+ {tokend = p+1;{ ret_tok( TK_CloseTag ); {{p = ((tokend))-1;}goto _out20;} }{p = ((tokend))-1;}}
+ goto st20;
+tr27:
+#line 150 "xmlscan.rl"
+ { tag_id_len = p - tag_id_start; }
+#line 115 "xmlscan.rl"
+ { curcol++; }
+#line 157 "xmlscan.rl"
+ {tokend = p+1;{ ret_tok( TK_OpenTag ); {{p = ((tokend))-1;}goto _out20;} }{p = ((tokend))-1;}}
+ goto st20;
+tr30:
+#line 115 "xmlscan.rl"
+ { curcol++; }
+#line 157 "xmlscan.rl"
+ {tokend = p+1;{ ret_tok( TK_OpenTag ); {{p = ((tokend))-1;}goto _out20;} }{p = ((tokend))-1;}}
+ goto st20;
+tr46:
+#line 132 "xmlscan.rl"
+ {
+ attr_value_len = p - attr_value_start;
+
+ AttrMarker newAttr;
+ newAttr.id = attr_id_start;
+ newAttr.idLen = attr_id_len;
+ newAttr.value = attr_value_start;
+ newAttr.valueLen = attr_value_len;
+ attrMkList.append( newAttr );
+ }
+#line 115 "xmlscan.rl"
+ { curcol++; }
+#line 157 "xmlscan.rl"
+ {tokend = p+1;{ ret_tok( TK_OpenTag ); {{p = ((tokend))-1;}goto _out20;} }{p = ((tokend))-1;}}
+ goto st20;
+tr48:
+#line 115 "xmlscan.rl"
+ { curcol++; }
+#line 164 "xmlscan.rl"
+ {tokend = p+1;{ buffer.append( *p ); }{p = ((tokend))-1;}}
+ goto st20;
+tr49:
+#line 116 "xmlscan.rl"
+ { token_col = curcol; token_line = curline; }
+#line 175 "xmlscan.rl"
+ {tokend = p+1;{ ret_tok( TK_EOF ); {{p = ((tokend))-1;}goto _out20;} }{p = ((tokend))-1;}}
+ goto st20;
+tr50:
+#line 117 "xmlscan.rl"
+ { curcol = 0; curline++; }
+#line 115 "xmlscan.rl"
+ { curcol++; }
+#line 164 "xmlscan.rl"
+ {tokend = p+1;{ buffer.append( *p ); }{p = ((tokend))-1;}}
+ goto st20;
+st20:
+#line 1 "xmlscan.rl"
+ {tokstart = 0;}
+ if ( ++p == pe )
+ goto _out20;
+case 20:
+#line 1 "xmlscan.rl"
+ {tokstart = p;}
+#line 285 "xmlscan.cpp"
+ switch( (*p) ) {
+ case 0: goto tr49;
+ case 10: goto tr50;
+ case 38: goto tr51;
+ case 60: goto tr52;
+ }
+ goto tr48;
+tr51:
+#line 115 "xmlscan.rl"
+ { curcol++; }
+ goto st1;
+st1:
+ if ( ++p == pe )
+ goto _out1;
+case 1:
+#line 301 "xmlscan.cpp"
+ switch( (*p) ) {
+ case 97: goto tr0;
+ case 103: goto tr2;
+ case 108: goto tr3;
+ }
+ goto st0;
+st0:
+ goto _out0;
+tr0:
+#line 115 "xmlscan.rl"
+ { curcol++; }
+ goto st2;
+st2:
+ if ( ++p == pe )
+ goto _out2;
+case 2:
+#line 318 "xmlscan.cpp"
+ if ( (*p) == 109 )
+ goto tr4;
+ goto st0;
+tr4:
+#line 115 "xmlscan.rl"
+ { curcol++; }
+ goto st3;
+st3:
+ if ( ++p == pe )
+ goto _out3;
+case 3:
+#line 330 "xmlscan.cpp"
+ if ( (*p) == 112 )
+ goto tr5;
+ goto st0;
+tr5:
+#line 115 "xmlscan.rl"
+ { curcol++; }
+ goto st4;
+st4:
+ if ( ++p == pe )
+ goto _out4;
+case 4:
+#line 342 "xmlscan.cpp"
+ if ( (*p) == 59 )
+ goto tr6;
+ goto st0;
+tr2:
+#line 115 "xmlscan.rl"
+ { curcol++; }
+ goto st5;
+st5:
+ if ( ++p == pe )
+ goto _out5;
+case 5:
+#line 354 "xmlscan.cpp"
+ if ( (*p) == 116 )
+ goto tr7;
+ goto st0;
+tr7:
+#line 115 "xmlscan.rl"
+ { curcol++; }
+ goto st6;
+st6:
+ if ( ++p == pe )
+ goto _out6;
+case 6:
+#line 366 "xmlscan.cpp"
+ if ( (*p) == 59 )
+ goto tr8;
+ goto st0;
+tr3:
+#line 115 "xmlscan.rl"
+ { curcol++; }
+ goto st7;
+st7:
+ if ( ++p == pe )
+ goto _out7;
+case 7:
+#line 378 "xmlscan.cpp"
+ if ( (*p) == 116 )
+ goto tr9;
+ goto st0;
+tr9:
+#line 115 "xmlscan.rl"
+ { curcol++; }
+ goto st8;
+st8:
+ if ( ++p == pe )
+ goto _out8;
+case 8:
+#line 390 "xmlscan.cpp"
+ if ( (*p) == 59 )
+ goto tr10;
+ goto st0;
+tr11:
+#line 115 "xmlscan.rl"
+ { curcol++; }
+ goto st9;
+tr12:
+#line 117 "xmlscan.rl"
+ { curcol = 0; curline++; }
+#line 115 "xmlscan.rl"
+ { curcol++; }
+ goto st9;
+tr52:
+#line 116 "xmlscan.rl"
+ { token_col = curcol; token_line = curline; }
+#line 115 "xmlscan.rl"
+ { curcol++; }
+ goto st9;
+st9:
+ if ( ++p == pe )
+ goto _out9;
+case 9:
+#line 414 "xmlscan.cpp"
+ switch( (*p) ) {
+ case 9: goto tr11;
+ case 10: goto tr12;
+ case 13: goto tr11;
+ case 32: goto tr11;
+ case 47: goto tr13;
+ case 95: goto tr14;
+ }
+ if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr14;
+ } else if ( (*p) >= 65 )
+ goto tr14;
+ goto st0;
+tr13:
+#line 115 "xmlscan.rl"
+ { curcol++; }
+ goto st10;
+tr15:
+#line 117 "xmlscan.rl"
+ { curcol = 0; curline++; }
+#line 115 "xmlscan.rl"
+ { curcol++; }
+ goto st10;
+st10:
+ if ( ++p == pe )
+ goto _out10;
+case 10:
+#line 443 "xmlscan.cpp"
+ switch( (*p) ) {
+ case 9: goto tr13;
+ case 10: goto tr15;
+ case 13: goto tr13;
+ case 32: goto tr13;
+ case 95: goto tr16;
+ }
+ if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr16;
+ } else if ( (*p) >= 65 )
+ goto tr16;
+ goto st0;
+tr19:
+#line 115 "xmlscan.rl"
+ { curcol++; }
+ goto st11;
+tr16:
+#line 149 "xmlscan.rl"
+ { tag_id_start = p; }
+#line 115 "xmlscan.rl"
+ { curcol++; }
+ goto st11;
+st11:
+ if ( ++p == pe )
+ goto _out11;
+case 11:
+#line 471 "xmlscan.cpp"
+ switch( (*p) ) {
+ case 9: goto tr17;
+ case 10: goto tr18;
+ case 13: goto tr17;
+ case 32: goto tr17;
+ case 62: goto tr20;
+ case 95: goto tr19;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr19;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr19;
+ } else
+ goto tr19;
+ goto st0;
+tr21:
+#line 115 "xmlscan.rl"
+ { curcol++; }
+ goto st12;
+tr22:
+#line 117 "xmlscan.rl"
+ { curcol = 0; curline++; }
+#line 115 "xmlscan.rl"
+ { curcol++; }
+ goto st12;
+tr17:
+#line 150 "xmlscan.rl"
+ { tag_id_len = p - tag_id_start; }
+#line 115 "xmlscan.rl"
+ { curcol++; }
+ goto st12;
+tr18:
+#line 150 "xmlscan.rl"
+ { tag_id_len = p - tag_id_start; }
+#line 117 "xmlscan.rl"
+ { curcol = 0; curline++; }
+#line 115 "xmlscan.rl"
+ { curcol++; }
+ goto st12;
+st12:
+ if ( ++p == pe )
+ goto _out12;
+case 12:
+#line 517 "xmlscan.cpp"
+ switch( (*p) ) {
+ case 9: goto tr21;
+ case 10: goto tr22;
+ case 13: goto tr21;
+ case 32: goto tr21;
+ case 62: goto tr23;
+ }
+ goto st0;
+tr26:
+#line 115 "xmlscan.rl"
+ { curcol++; }
+ goto st13;
+tr14:
+#line 149 "xmlscan.rl"
+ { tag_id_start = p; }
+#line 115 "xmlscan.rl"
+ { curcol++; }
+ goto st13;
+st13:
+ if ( ++p == pe )
+ goto _out13;
+case 13:
+#line 540 "xmlscan.cpp"
+ switch( (*p) ) {
+ case 9: goto tr24;
+ case 10: goto tr25;
+ case 13: goto tr24;
+ case 32: goto tr24;
+ case 62: goto tr27;
+ case 95: goto tr26;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr26;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr26;
+ } else
+ goto tr26;
+ goto st0;
+tr28:
+#line 115 "xmlscan.rl"
+ { curcol++; }
+ goto st14;
+tr29:
+#line 117 "xmlscan.rl"
+ { curcol = 0; curline++; }
+#line 115 "xmlscan.rl"
+ { curcol++; }
+ goto st14;
+tr24:
+#line 150 "xmlscan.rl"
+ { tag_id_len = p - tag_id_start; }
+#line 115 "xmlscan.rl"
+ { curcol++; }
+ goto st14;
+tr25:
+#line 150 "xmlscan.rl"
+ { tag_id_len = p - tag_id_start; }
+#line 117 "xmlscan.rl"
+ { curcol = 0; curline++; }
+#line 115 "xmlscan.rl"
+ { curcol++; }
+ goto st14;
+tr44:
+#line 132 "xmlscan.rl"
+ {
+ attr_value_len = p - attr_value_start;
+
+ AttrMarker newAttr;
+ newAttr.id = attr_id_start;
+ newAttr.idLen = attr_id_len;
+ newAttr.value = attr_value_start;
+ newAttr.valueLen = attr_value_len;
+ attrMkList.append( newAttr );
+ }
+#line 115 "xmlscan.rl"
+ { curcol++; }
+ goto st14;
+tr45:
+#line 132 "xmlscan.rl"
+ {
+ attr_value_len = p - attr_value_start;
+
+ AttrMarker newAttr;
+ newAttr.id = attr_id_start;
+ newAttr.idLen = attr_id_len;
+ newAttr.value = attr_value_start;
+ newAttr.valueLen = attr_value_len;
+ attrMkList.append( newAttr );
+ }
+#line 117 "xmlscan.rl"
+ { curcol = 0; curline++; }
+#line 115 "xmlscan.rl"
+ { curcol++; }
+ goto st14;
+st14:
+ if ( ++p == pe )
+ goto _out14;
+case 14:
+#line 618 "xmlscan.cpp"
+ switch( (*p) ) {
+ case 9: goto tr28;
+ case 10: goto tr29;
+ case 13: goto tr28;
+ case 32: goto tr28;
+ case 62: goto tr30;
+ case 95: goto tr31;
+ }
+ if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr31;
+ } else if ( (*p) >= 65 )
+ goto tr31;
+ goto st0;
+tr34:
+#line 115 "xmlscan.rl"
+ { curcol++; }
+ goto st15;
+tr31:
+#line 124 "xmlscan.rl"
+ { attr_id_start = p; }
+#line 115 "xmlscan.rl"
+ { curcol++; }
+ goto st15;
+tr47:
+#line 132 "xmlscan.rl"
+ {
+ attr_value_len = p - attr_value_start;
+
+ AttrMarker newAttr;
+ newAttr.id = attr_id_start;
+ newAttr.idLen = attr_id_len;
+ newAttr.value = attr_value_start;
+ newAttr.valueLen = attr_value_len;
+ attrMkList.append( newAttr );
+ }
+#line 124 "xmlscan.rl"
+ { attr_id_start = p; }
+#line 115 "xmlscan.rl"
+ { curcol++; }
+ goto st15;
+st15:
+ if ( ++p == pe )
+ goto _out15;
+case 15:
+#line 664 "xmlscan.cpp"
+ switch( (*p) ) {
+ case 9: goto tr32;
+ case 10: goto tr33;
+ case 13: goto tr32;
+ case 32: goto tr32;
+ case 61: goto tr35;
+ case 95: goto tr34;
+ }
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr34;
+ } else if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr34;
+ } else
+ goto tr34;
+ goto st0;
+tr36:
+#line 115 "xmlscan.rl"
+ { curcol++; }
+ goto st16;
+tr37:
+#line 117 "xmlscan.rl"
+ { curcol = 0; curline++; }
+#line 115 "xmlscan.rl"
+ { curcol++; }
+ goto st16;
+tr32:
+#line 125 "xmlscan.rl"
+ { attr_id_len = p - attr_id_start; }
+#line 115 "xmlscan.rl"
+ { curcol++; }
+ goto st16;
+tr33:
+#line 125 "xmlscan.rl"
+ { attr_id_len = p - attr_id_start; }
+#line 117 "xmlscan.rl"
+ { curcol = 0; curline++; }
+#line 115 "xmlscan.rl"
+ { curcol++; }
+ goto st16;
+st16:
+ if ( ++p == pe )
+ goto _out16;
+case 16:
+#line 710 "xmlscan.cpp"
+ switch( (*p) ) {
+ case 9: goto tr36;
+ case 10: goto tr37;
+ case 13: goto tr36;
+ case 32: goto tr36;
+ case 61: goto tr38;
+ }
+ goto st0;
+tr38:
+#line 115 "xmlscan.rl"
+ { curcol++; }
+ goto st17;
+tr39:
+#line 117 "xmlscan.rl"
+ { curcol = 0; curline++; }
+#line 115 "xmlscan.rl"
+ { curcol++; }
+ goto st17;
+tr35:
+#line 125 "xmlscan.rl"
+ { attr_id_len = p - attr_id_start; }
+#line 115 "xmlscan.rl"
+ { curcol++; }
+ goto st17;
+st17:
+ if ( ++p == pe )
+ goto _out17;
+case 17:
+#line 739 "xmlscan.cpp"
+ switch( (*p) ) {
+ case 9: goto tr38;
+ case 10: goto tr39;
+ case 13: goto tr38;
+ case 32: goto tr38;
+ case 34: goto tr40;
+ }
+ goto st0;
+tr41:
+#line 115 "xmlscan.rl"
+ { curcol++; }
+ goto st18;
+tr42:
+#line 117 "xmlscan.rl"
+ { curcol = 0; curline++; }
+#line 115 "xmlscan.rl"
+ { curcol++; }
+ goto st18;
+tr40:
+#line 130 "xmlscan.rl"
+ { attr_value_start = p; }
+#line 115 "xmlscan.rl"
+ { curcol++; }
+ goto st18;
+st18:
+ if ( ++p == pe )
+ goto _out18;
+case 18:
+#line 768 "xmlscan.cpp"
+ switch( (*p) ) {
+ case 10: goto tr42;
+ case 34: goto tr43;
+ }
+ goto tr41;
+tr43:
+#line 115 "xmlscan.rl"
+ { curcol++; }
+ goto st19;
+st19:
+ if ( ++p == pe )
+ goto _out19;
+case 19:
+#line 782 "xmlscan.cpp"
+ switch( (*p) ) {
+ case 9: goto tr44;
+ case 10: goto tr45;
+ case 13: goto tr44;
+ case 32: goto tr44;
+ case 62: goto tr46;
+ case 95: goto tr47;
+ }
+ if ( (*p) > 90 ) {
+ if ( 97 <= (*p) && (*p) <= 122 )
+ goto tr47;
+ } else if ( (*p) >= 65 )
+ goto tr47;
+ goto st0;
+ }
+ _out20: cs = 20; goto _out;
+ _out1: cs = 1; goto _out;
+ _out0: cs = 0; goto _out;
+ _out2: cs = 2; goto _out;
+ _out3: cs = 3; goto _out;
+ _out4: cs = 4; goto _out;
+ _out5: cs = 5; goto _out;
+ _out6: cs = 6; goto _out;
+ _out7: cs = 7; goto _out;
+ _out8: cs = 8; goto _out;
+ _out9: cs = 9; goto _out;
+ _out10: cs = 10; goto _out;
+ _out11: cs = 11; goto _out;
+ _out12: cs = 12; goto _out;
+ _out13: cs = 13; goto _out;
+ _out14: cs = 14; goto _out;
+ _out15: cs = 15; goto _out;
+ _out16: cs = 16; goto _out;
+ _out17: cs = 17; goto _out;
+ _out18: cs = 18; goto _out;
+ _out19: cs = 19; goto _out;
+
+ _out: {}
+ }
+#line 239 "xmlscan.rl"
+
+ if ( cs == Scanner_error )
+ return TK_ERR;
+
+ if ( token != TK_NO_TOKEN ) {
+ /* fbreak does not advance p, so we do it manually. */
+ p = p + 1;
+ data_len = p - data;
+ return token;
+ }
+ }
+}
+
+int xml_parse( std::istream &input, const char *fileName,
+ bool outputActive, bool wantComplete )
+{
+ Scanner scanner( fileName, input );
+ Parser parser( fileName, outputActive, wantComplete );
+
+ parser.init();
+
+ while ( 1 ) {
+ int token = scanner.scan();
+ if ( token == TK_NO_TOKEN ) {
+ cerr << "xmlscan: interal error: scanner returned NO_TOKEN" << endl;
+ exit(1);
+ }
+ else if ( token == TK_EOF ) {
+ parser.token( _eof, scanner.token_col, scanner.token_line );
+ break;
+ }
+ else if ( token == TK_ERR ) {
+ scanner.error() << "scanner error" << endl;
+ break;
+ }
+ else if ( token == TK_SPACE ) {
+ scanner.error() << "scanner is out of buffer space" << endl;
+ break;
+ }
+ else {
+ /* All other tokens are either open or close tags. */
+ XMLTagHashPair *tagId = Perfect_Hash::in_word_set(
+ scanner.tag_id_start, scanner.tag_id_len );
+
+ XMLTag *tag = new XMLTag( tagId, token == TK_OpenTag ?
+ XMLTag::Open : XMLTag::Close );
+
+ if ( tagId != 0 ) {
+ /* Get attributes for open tags. */
+ if ( token == TK_OpenTag && scanner.attrMkList.length() > 0 ) {
+ tag->attrList = new AttrList;
+ for ( AttrMkList::Iter attr = scanner.attrMkList;
+ attr.lte(); attr++ )
+ {
+ Attribute newAttr;
+ newAttr.id = new char[attr->idLen+1];
+ memcpy( newAttr.id, attr->id, attr->idLen );
+ newAttr.id[attr->idLen] = 0;
+
+ /* Exclude the surrounding quotes. */
+ newAttr.value = new char[attr->valueLen-1];
+ memcpy( newAttr.value, attr->value+1, attr->valueLen-2 );
+ newAttr.value[attr->valueLen-2] = 0;
+
+ tag->attrList->append( newAttr );
+ }
+ }
+
+ /* Get content for closing tags. */
+ if ( token == TK_CloseTag ) {
+ switch ( tagId->id ) {
+ case TAG_host: case TAG_arg:
+ case TAG_t: case TAG_alphtype:
+ case TAG_text: case TAG_goto:
+ case TAG_call: case TAG_next:
+ case TAG_entry: case TAG_set_tokend:
+ case TAG_set_act: case TAG_start_state:
+ case TAG_error_state: case TAG_state_actions:
+ case TAG_action_table: case TAG_cond_space:
+ case TAG_c: case TAG_ex:
+ tag->content = new char[scanner.buffer.length+1];
+ memcpy( tag->content, scanner.buffer.data,
+ scanner.buffer.length );
+ tag->content[scanner.buffer.length] = 0;
+ break;
+ }
+ }
+ }
+
+ #if 0
+ cerr << "parser_driver: " << (tag->type == XMLTag::Open ? "open" : "close") <<
+ ": " << (tag->tagId != 0 ? tag->tagId->name : "<unknown>") << endl;
+ if ( tag->attrList != 0 ) {
+ for ( AttrList::Iter attr = *tag->attrList; attr.lte(); attr++ )
+ cerr << " " << attr->id << ": " << attr->value << endl;
+ }
+ if ( tag->content != 0 )
+ cerr << " content: " << tag->content << endl;
+ #endif
+
+ parser.token( tag, scanner.token_col, scanner.token_line );
+ }
+ }
+
+ return 0;
+}
+
+std::ostream &Scanner::error()
+{
+ cerr << fileName << ":" << curline << ":" << curcol << ": ";
+ return cerr;
+}
diff --git a/contrib/tools/ragel5/redfsm/xmltags.cpp b/contrib/tools/ragel5/redfsm/xmltags.cpp
new file mode 100644
index 0000000000..5fbfabab1d
--- /dev/null
+++ b/contrib/tools/ragel5/redfsm/xmltags.cpp
@@ -0,0 +1,244 @@
+/* C++ code produced by gperf version 3.0.1 */
+/* Command-line: gperf -L C++ -t xmltags.gperf */
+/* Computed positions: -k'1,3' */
+
+#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
+ && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
+ && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
+ && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
+ && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
+ && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
+ && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
+ && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
+ && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
+ && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
+ && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
+ && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
+ && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
+ && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
+ && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
+ && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
+ && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
+ && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
+ && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
+ && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
+ && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
+ && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
+ && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
+/* The character set is not based on ISO-646. */
+#error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gnu-gperf@gnu.org>."
+#endif
+
+#line 23 "xmltags.gperf"
+
+#include <string.h>
+#include "xmlparse.h"
+#line 28 "xmltags.gperf"
+struct XMLTagHashPair;
+
+#define TOTAL_KEYWORDS 55
+#define MIN_WORD_LENGTH 1
+#define MAX_WORD_LENGTH 17
+#define MIN_HASH_VALUE 5
+#define MAX_HASH_VALUE 84
+/* maximum key range = 80, duplicates = 0 */
+
+#include "phash.h"
+
+inline unsigned int
+Perfect_Hash::hash (register const char *str, register unsigned int len)
+{
+ static const unsigned char asso_values[] =
+ {
+ 85, 85, 85, 85, 85, 85, 85, 85, 85, 85,
+ 85, 85, 85, 85, 85, 85, 85, 85, 85, 85,
+ 85, 85, 85, 85, 85, 85, 85, 85, 85, 85,
+ 85, 85, 85, 85, 85, 85, 85, 85, 85, 85,
+ 85, 85, 85, 85, 85, 85, 85, 85, 85, 85,
+ 85, 85, 85, 85, 85, 85, 85, 85, 85, 85,
+ 85, 85, 85, 85, 85, 85, 85, 85, 85, 85,
+ 85, 85, 85, 85, 85, 85, 85, 85, 85, 85,
+ 85, 85, 85, 85, 85, 85, 85, 85, 85, 85,
+ 85, 85, 85, 85, 85, 20, 85, 5, 41, 35,
+ 5, 35, 85, 15, 10, 0, 85, 85, 40, 0,
+ 15, 85, 40, 85, 25, 0, 10, 85, 85, 0,
+ 56, 85, 85, 85, 85, 85, 85, 85, 85, 85,
+ 85, 85, 85, 85, 85, 85, 85, 85, 85, 85,
+ 85, 85, 85, 85, 85, 85, 85, 85, 85, 85,
+ 85, 85, 85, 85, 85, 85, 85, 85, 85, 85,
+ 85, 85, 85, 85, 85, 85, 85, 85, 85, 85,
+ 85, 85, 85, 85, 85, 85, 85, 85, 85, 85,
+ 85, 85, 85, 85, 85, 85, 85, 85, 85, 85,
+ 85, 85, 85, 85, 85, 85, 85, 85, 85, 85,
+ 85, 85, 85, 85, 85, 85, 85, 85, 85, 85,
+ 85, 85, 85, 85, 85, 85, 85, 85, 85, 85,
+ 85, 85, 85, 85, 85, 85, 85, 85, 85, 85,
+ 85, 85, 85, 85, 85, 85, 85, 85, 85, 85,
+ 85, 85, 85, 85, 85, 85, 85, 85, 85, 85,
+ 85, 85, 85, 85, 85, 85
+ };
+ int hval = len;
+
+ switch (hval)
+ {
+ default:
+ hval += asso_values[(unsigned char)str[2]];
+ /*FALLTHROUGH*/
+ case 2:
+ case 1:
+ hval += asso_values[(unsigned char)str[0]];
+ break;
+ }
+ return hval;
+}
+
+struct XMLTagHashPair *
+Perfect_Hash::in_word_set (register const char *str, register unsigned int len)
+{
+ static struct XMLTagHashPair wordlist[] =
+ {
+ {""}, {""}, {""}, {""}, {""},
+#line 74 "xmltags.gperf"
+ {"write", TAG_write},
+ {""}, {""},
+#line 68 "xmltags.gperf"
+ {"init_act", TAG_init_act},
+ {""},
+#line 34 "xmltags.gperf"
+ {"state", TAG_state},
+#line 36 "xmltags.gperf"
+ {"t", TAG_t},
+ {""},
+#line 72 "xmltags.gperf"
+ {"init_tokstart", TAG_init_tokstart},
+#line 32 "xmltags.gperf"
+ {"host", TAG_host},
+#line 33 "xmltags.gperf"
+ {"state_list", TAG_state_list},
+#line 38 "xmltags.gperf"
+ {"start_state", TAG_start_state},
+#line 69 "xmltags.gperf"
+ {"set_act", TAG_set_act},
+#line 46 "xmltags.gperf"
+ {"state_actions", TAG_state_actions},
+#line 65 "xmltags.gperf"
+ {"data", TAG_data},
+#line 71 "xmltags.gperf"
+ {"set_tokend", TAG_set_tokend},
+#line 41 "xmltags.gperf"
+ {"action", TAG_action},
+#line 73 "xmltags.gperf"
+ {"set_tokstart", TAG_set_tokstart},
+#line 78 "xmltags.gperf"
+ {"arg", TAG_arg},
+ {""},
+#line 35 "xmltags.gperf"
+ {"trans_list", TAG_trans_list},
+#line 40 "xmltags.gperf"
+ {"action_list", TAG_action_list},
+#line 43 "xmltags.gperf"
+ {"action_table", TAG_action_table},
+ {""},
+#line 49 "xmltags.gperf"
+ {"goto", TAG_goto},
+ {""},
+#line 45 "xmltags.gperf"
+ {"getkey", TAG_getkey},
+#line 42 "xmltags.gperf"
+ {"action_table_list", TAG_action_table_list},
+ {""},
+#line 52 "xmltags.gperf"
+ {"goto_expr", TAG_goto_expr},
+#line 70 "xmltags.gperf"
+ {"get_tokend", TAG_get_tokend},
+#line 82 "xmltags.gperf"
+ {"c", TAG_c},
+#line 84 "xmltags.gperf"
+ {"ex", TAG_ex},
+#line 55 "xmltags.gperf"
+ {"ret", TAG_ret},
+ {""},
+#line 63 "xmltags.gperf"
+ {"targs", TAG_targs},
+ {""},
+#line 37 "xmltags.gperf"
+ {"machine", TAG_machine},
+ {""},
+#line 57 "xmltags.gperf"
+ {"char", TAG_char},
+#line 30 "xmltags.gperf"
+ {"ragel", TAG_ragel},
+#line 76 "xmltags.gperf"
+ {"access", TAG_access},
+ {""}, {""},
+#line 31 "xmltags.gperf"
+ {"ragel_def", TAG_ragel_def},
+#line 64 "xmltags.gperf"
+ {"entry", TAG_entry},
+#line 67 "xmltags.gperf"
+ {"sub_action", TAG_sub_action},
+ {""},
+#line 44 "xmltags.gperf"
+ {"alphtype", TAG_alphtype},
+#line 58 "xmltags.gperf"
+ {"hold", TAG_hold},
+#line 56 "xmltags.gperf"
+ {"pchar", TAG_pchar},
+#line 60 "xmltags.gperf"
+ {"holdte", TAG_holdte},
+#line 47 "xmltags.gperf"
+ {"entry_points", TAG_entry_points},
+ {""},
+#line 81 "xmltags.gperf"
+ {"cond_list", TAG_cond_list},
+#line 80 "xmltags.gperf"
+ {"cond_space", TAG_cond_space},
+ {""}, {""}, {""},
+#line 62 "xmltags.gperf"
+ {"curs", TAG_curs},
+#line 79 "xmltags.gperf"
+ {"cond_space_list", TAG_cond_space_list},
+ {""}, {""},
+#line 75 "xmltags.gperf"
+ {"curstate", TAG_curstate},
+#line 66 "xmltags.gperf"
+ {"lm_switch", TAG_lm_switch},
+#line 48 "xmltags.gperf"
+ {"text", TAG_text},
+#line 39 "xmltags.gperf"
+ {"error_state", TAG_error_state},
+ {""}, {""},
+#line 59 "xmltags.gperf"
+ {"exec", TAG_exec},
+#line 51 "xmltags.gperf"
+ {"next", TAG_next},
+#line 61 "xmltags.gperf"
+ {"execte", TAG_execte},
+ {""}, {""},
+#line 50 "xmltags.gperf"
+ {"call", TAG_call},
+#line 54 "xmltags.gperf"
+ {"next_expr", TAG_next_expr},
+#line 77 "xmltags.gperf"
+ {"break", TAG_break},
+#line 83 "xmltags.gperf"
+ {"exports", TAG_exports},
+ {""},
+#line 53 "xmltags.gperf"
+ {"call_expr", TAG_call_expr}
+ };
+
+ if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
+ {
+ int key = hash (str, len);
+
+ if (key <= MAX_HASH_VALUE && key >= 0)
+ {
+ const char *s = wordlist[key].name;
+
+ if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
+ return &wordlist[key];
+ }
+ }
+ return 0;
+}
diff --git a/contrib/tools/ragel5/redfsm/ya.make b/contrib/tools/ragel5/redfsm/ya.make
new file mode 100644
index 0000000000..8bb2b97d44
--- /dev/null
+++ b/contrib/tools/ragel5/redfsm/ya.make
@@ -0,0 +1,25 @@
+LIBRARY()
+
+LICENSE(GPL-2.0-or-later)
+
+NO_UTIL()
+NO_COMPILER_WARNINGS()
+
+ADDINCL(
+ GLOBAL contrib/tools/ragel5/redfsm
+)
+
+PEERDIR(
+ contrib/tools/ragel5/aapl
+ contrib/tools/ragel5/common
+)
+
+SRCS(
+ gendata.cpp
+ redfsm.cpp
+ xmlparse.cpp
+ xmlscan.cpp
+ xmltags.cpp
+)
+
+END()
diff --git a/contrib/tools/ragel5/rlgen-cd/fflatcodegen.cpp b/contrib/tools/ragel5/rlgen-cd/fflatcodegen.cpp
new file mode 100644
index 0000000000..813347fd2b
--- /dev/null
+++ b/contrib/tools/ragel5/rlgen-cd/fflatcodegen.cpp
@@ -0,0 +1,351 @@
+/*
+ * Copyright 2004-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ * 2004 Erich Ocean <eric.ocean@ampede.com>
+ * 2005 Alan West <alan@alanz.com>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "rlgen-cd.h"
+#include "fflatcodegen.h"
+#include "redfsm.h"
+#include "gendata.h"
+
+std::ostream &FFlatCodeGen::TO_STATE_ACTION( RedStateAp *state )
+{
+ int act = 0;
+ if ( state->toStateAction != 0 )
+ act = state->toStateAction->actListId+1;
+ out << act;
+ return out;
+}
+
+std::ostream &FFlatCodeGen::FROM_STATE_ACTION( RedStateAp *state )
+{
+ int act = 0;
+ if ( state->fromStateAction != 0 )
+ act = state->fromStateAction->actListId+1;
+ out << act;
+ return out;
+}
+
+std::ostream &FFlatCodeGen::EOF_ACTION( RedStateAp *state )
+{
+ int act = 0;
+ if ( state->eofAction != 0 )
+ act = state->eofAction->actListId+1;
+ out << act;
+ return out;
+}
+
+/* Write out the function for a transition. */
+std::ostream &FFlatCodeGen::TRANS_ACTION( RedTransAp *trans )
+{
+ int action = 0;
+ if ( trans->action != 0 )
+ action = trans->action->actListId+1;
+ out << action;
+ return out;
+}
+
+/* Write out the function switch. This switch is keyed on the values
+ * of the func index. */
+std::ostream &FFlatCodeGen::TO_STATE_ACTION_SWITCH()
+{
+ /* Loop the actions. */
+ for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) {
+ if ( redAct->numToStateRefs > 0 ) {
+ /* Write the entry label. */
+ out << "\tcase " << redAct->actListId+1 << ":\n";
+
+ /* Write each action in the list of action items. */
+ for ( ActionTable::Iter item = redAct->key; item.lte(); item++ )
+ ACTION( out, item->value, 0, false );
+
+ out << "\tbreak;\n";
+ }
+ }
+
+ genLineDirective( out );
+ return out;
+}
+
+/* Write out the function switch. This switch is keyed on the values
+ * of the func index. */
+std::ostream &FFlatCodeGen::FROM_STATE_ACTION_SWITCH()
+{
+ /* Loop the actions. */
+ for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) {
+ if ( redAct->numFromStateRefs > 0 ) {
+ /* Write the entry label. */
+ out << "\tcase " << redAct->actListId+1 << ":\n";
+
+ /* Write each action in the list of action items. */
+ for ( ActionTable::Iter item = redAct->key; item.lte(); item++ )
+ ACTION( out, item->value, 0, false );
+
+ out << "\tbreak;\n";
+ }
+ }
+
+ genLineDirective( out );
+ return out;
+}
+
+std::ostream &FFlatCodeGen::EOF_ACTION_SWITCH()
+{
+ /* Loop the actions. */
+ for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) {
+ if ( redAct->numEofRefs > 0 ) {
+ /* Write the entry label. */
+ out << "\tcase " << redAct->actListId+1 << ":\n";
+
+ /* Write each action in the list of action items. */
+ for ( ActionTable::Iter item = redAct->key; item.lte(); item++ )
+ ACTION( out, item->value, 0, true );
+
+ out << "\tbreak;\n";
+ }
+ }
+
+ genLineDirective( out );
+ return out;
+}
+
+/* Write out the function switch. This switch is keyed on the values
+ * of the func index. */
+std::ostream &FFlatCodeGen::ACTION_SWITCH()
+{
+ /* Loop the actions. */
+ for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) {
+ if ( redAct->numTransRefs > 0 ) {
+ /* Write the entry label. */
+ out << "\tcase " << redAct->actListId+1 << ":\n";
+
+ /* Write each action in the list of action items. */
+ for ( ActionTable::Iter item = redAct->key; item.lte(); item++ )
+ ACTION( out, item->value, 0, false );
+
+ out << "\tbreak;\n";
+ }
+ }
+
+ genLineDirective( out );
+ return out;
+}
+
+void FFlatCodeGen::writeData()
+{
+ if ( redFsm->anyConditions() ) {
+ OPEN_ARRAY( WIDE_ALPH_TYPE(), CK() );
+ COND_KEYS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxCondSpan), CSP() );
+ COND_KEY_SPANS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxCond), C() );
+ CONDS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxCondIndexOffset), CO() );
+ COND_INDEX_OFFSET();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+
+ OPEN_ARRAY( WIDE_ALPH_TYPE(), K() );
+ KEYS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxSpan), SP() );
+ KEY_SPANS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxFlatIndexOffset), IO() );
+ FLAT_INDEX_OFFSET();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxIndex), I() );
+ INDICIES();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxState), TT() );
+ TRANS_TARGS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ if ( redFsm->anyActions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActListId), TA() );
+ TRANS_ACTIONS();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+
+ if ( redFsm->anyToStateActions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActionLoc), TSA() );
+ TO_STATE_ACTIONS();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+
+ if ( redFsm->anyFromStateActions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActionLoc), FSA() );
+ FROM_STATE_ACTIONS();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+
+ if ( redFsm->anyEofActions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActListId), EA() );
+ EOF_ACTIONS();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+
+ STATE_IDS();
+}
+
+void FFlatCodeGen::writeExec()
+{
+ outLabelUsed = false;
+
+ out <<
+ " {\n"
+ " int _slen";
+
+ if ( redFsm->anyRegCurStateRef() )
+ out << ", _ps";
+
+ out << ";\n";
+ out << " int _trans";
+
+ if ( redFsm->anyConditions() )
+ out << ", _cond";
+
+ out << ";\n";
+
+ out <<
+ " " << PTR_CONST() << WIDE_ALPH_TYPE() << POINTER() << "_keys;\n"
+ " " << PTR_CONST() << ARRAY_TYPE(redFsm->maxIndex) << POINTER() << "_inds;\n";
+
+ if ( redFsm->anyConditions() ) {
+ out <<
+ " " << PTR_CONST() << ARRAY_TYPE(redFsm->maxCond) << POINTER() << "_conds;\n"
+ " " << WIDE_ALPH_TYPE() << " _widec;\n";
+ }
+
+ if ( hasEnd ) {
+ outLabelUsed = true;
+ out <<
+ " if ( " << P() << " == " << PE() << " )\n"
+ " goto _out;\n";
+ }
+
+ out << "_resume:\n";
+
+ if ( redFsm->errState != 0 ) {
+ outLabelUsed = true;
+ out <<
+ " if ( " << CS() << " == " << redFsm->errState->id << " )\n"
+ " goto _out;\n";
+ }
+
+ if ( redFsm->anyFromStateActions() ) {
+ out <<
+ " switch ( " << FSA() << "[" << CS() << "] ) {\n";
+ FROM_STATE_ACTION_SWITCH();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ "\n";
+ }
+
+ if ( redFsm->anyConditions() )
+ COND_TRANSLATE();
+
+ LOCATE_TRANS();
+
+ if ( redFsm->anyRegCurStateRef() )
+ out << " _ps = " << CS() << ";\n";
+
+ out <<
+ " " << CS() << " = " << TT() << "[_trans];\n\n";
+
+ if ( redFsm->anyRegActions() ) {
+ out <<
+ " if ( " << TA() << "[_trans] == 0 )\n"
+ " goto _again;\n"
+ "\n"
+ " switch ( " << TA() << "[_trans] ) {\n";
+ ACTION_SWITCH();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ "\n";
+ }
+
+ if ( redFsm->anyRegActions() || redFsm->anyActionGotos() ||
+ redFsm->anyActionCalls() || redFsm->anyActionRets() )
+ out << "_again:\n";
+
+ if ( redFsm->anyToStateActions() ) {
+ out <<
+ " switch ( " << TSA() << "[" << CS() << "] ) {\n";
+ TO_STATE_ACTION_SWITCH();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ "\n";
+ }
+
+ if ( hasEnd ) {
+ out <<
+ " if ( ++" << P() << " != " << PE() << " )\n"
+ " goto _resume;\n";
+ }
+ else {
+ out <<
+ " " << P() << " += 1;\n"
+ " goto _resume;\n";
+ }
+
+ if ( outLabelUsed )
+ out << " _out: {}\n";
+
+ out << " }\n";
+}
+
+void FFlatCodeGen::writeEOF()
+{
+ if ( redFsm->anyEofActions() ) {
+ out <<
+ " {\n"
+ " switch ( " << EA() << "[" << CS() << "] ) {\n";
+ EOF_ACTION_SWITCH();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ " }\n"
+ "\n";
+ }
+}
diff --git a/contrib/tools/ragel5/rlgen-cd/fflatcodegen.h b/contrib/tools/ragel5/rlgen-cd/fflatcodegen.h
new file mode 100644
index 0000000000..cf92fd9baf
--- /dev/null
+++ b/contrib/tools/ragel5/rlgen-cd/fflatcodegen.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright 2004-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ * 2004 Erich Ocean <eric.ocean@ampede.com>
+ * 2005 Alan West <alan@alanz.com>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _FFLATCODEGEN_H
+#define _FFLATCODEGEN_H
+
+#include <iostream>
+#include "flatcodegen.h"
+
+/* Forwards. */
+struct CodeGenData;
+
+/*
+ * FFlatCodeGen
+ */
+class FFlatCodeGen : public FlatCodeGen
+{
+protected:
+ FFlatCodeGen( ostream &out ) : FsmCodeGen(out), FlatCodeGen(out) {}
+
+ std::ostream &TO_STATE_ACTION_SWITCH();
+ std::ostream &FROM_STATE_ACTION_SWITCH();
+ std::ostream &EOF_ACTION_SWITCH();
+ std::ostream &ACTION_SWITCH();
+
+ virtual std::ostream &TO_STATE_ACTION( RedStateAp *state );
+ virtual std::ostream &FROM_STATE_ACTION( RedStateAp *state );
+ virtual std::ostream &EOF_ACTION( RedStateAp *state );
+ virtual std::ostream &TRANS_ACTION( RedTransAp *trans );
+
+ virtual void writeData();
+ virtual void writeEOF();
+ virtual void writeExec();
+};
+
+/*
+ * CFFlatCodeGen
+ */
+struct CFFlatCodeGen
+ : public FFlatCodeGen, public CCodeGen
+{
+ CFFlatCodeGen( ostream &out ) :
+ FsmCodeGen(out), FFlatCodeGen(out), CCodeGen(out) {}
+};
+
+/*
+ * DFFlatCodeGen
+ */
+struct DFFlatCodeGen
+ : public FFlatCodeGen, public DCodeGen
+{
+ DFFlatCodeGen( ostream &out ) :
+ FsmCodeGen(out), FFlatCodeGen(out), DCodeGen(out) {}
+};
+
+#endif /* _FFLATCODEGEN_H */
diff --git a/contrib/tools/ragel5/rlgen-cd/fgotocodegen.cpp b/contrib/tools/ragel5/rlgen-cd/fgotocodegen.cpp
new file mode 100644
index 0000000000..9c4f039f39
--- /dev/null
+++ b/contrib/tools/ragel5/rlgen-cd/fgotocodegen.cpp
@@ -0,0 +1,262 @@
+/*
+ * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ * 2004 Erich Ocean <eric.ocean@ampede.com>
+ * 2005 Alan West <alan@alanz.com>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "rlgen-cd.h"
+#include "fgotocodegen.h"
+#include "redfsm.h"
+#include "gendata.h"
+#include "bstmap.h"
+
+std::ostream &FGotoCodeGen::EXEC_ACTIONS()
+{
+ /* Loop the actions. */
+ for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) {
+ if ( redAct->numTransRefs > 0 ) {
+ /* We are at the start of a glob, write the case. */
+ out << "f" << redAct->actListId << ":\n";
+
+ /* Write each action in the list of action items. */
+ for ( ActionTable::Iter item = redAct->key; item.lte(); item++ )
+ ACTION( out, item->value, 0, false );
+
+ out << "\tgoto _again;\n";
+ }
+ }
+ return out;
+}
+
+/* Write out the function switch. This switch is keyed on the values
+ * of the func index. */
+std::ostream &FGotoCodeGen::TO_STATE_ACTION_SWITCH()
+{
+ /* Loop the actions. */
+ for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) {
+ if ( redAct->numToStateRefs > 0 ) {
+ /* Write the entry label. */
+ out << "\tcase " << redAct->actListId+1 << ":\n";
+
+ /* Write each action in the list of action items. */
+ for ( ActionTable::Iter item = redAct->key; item.lte(); item++ )
+ ACTION( out, item->value, 0, false );
+
+ out << "\tbreak;\n";
+ }
+ }
+
+ genLineDirective( out );
+ return out;
+}
+
+/* Write out the function switch. This switch is keyed on the values
+ * of the func index. */
+std::ostream &FGotoCodeGen::FROM_STATE_ACTION_SWITCH()
+{
+ /* Loop the actions. */
+ for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) {
+ if ( redAct->numFromStateRefs > 0 ) {
+ /* Write the entry label. */
+ out << "\tcase " << redAct->actListId+1 << ":\n";
+
+ /* Write each action in the list of action items. */
+ for ( ActionTable::Iter item = redAct->key; item.lte(); item++ )
+ ACTION( out, item->value, 0, false );
+
+ out << "\tbreak;\n";
+ }
+ }
+
+ genLineDirective( out );
+ return out;
+}
+
+std::ostream &FGotoCodeGen::EOF_ACTION_SWITCH()
+{
+ /* Loop the actions. */
+ for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) {
+ if ( redAct->numEofRefs > 0 ) {
+ /* Write the entry label. */
+ out << "\tcase " << redAct->actListId+1 << ":\n";
+
+ /* Write each action in the list of action items. */
+ for ( ActionTable::Iter item = redAct->key; item.lte(); item++ )
+ ACTION( out, item->value, 0, true );
+
+ out << "\tbreak;\n";
+ }
+ }
+
+ genLineDirective( out );
+ return out;
+}
+
+
+std::ostream &FGotoCodeGen::FINISH_CASES()
+{
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* States that are final and have an out action need a case. */
+ if ( st->eofAction != 0 ) {
+ /* Write the case label. */
+ out << "\t\tcase " << st->id << ": ";
+
+ /* Jump to the func. */
+ out << "goto f" << st->eofAction->actListId << ";\n";
+ }
+ }
+
+ return out;
+}
+
+unsigned int FGotoCodeGen::TO_STATE_ACTION( RedStateAp *state )
+{
+ int act = 0;
+ if ( state->toStateAction != 0 )
+ act = state->toStateAction->actListId+1;
+ return act;
+}
+
+unsigned int FGotoCodeGen::FROM_STATE_ACTION( RedStateAp *state )
+{
+ int act = 0;
+ if ( state->fromStateAction != 0 )
+ act = state->fromStateAction->actListId+1;
+ return act;
+}
+
+unsigned int FGotoCodeGen::EOF_ACTION( RedStateAp *state )
+{
+ int act = 0;
+ if ( state->eofAction != 0 )
+ act = state->eofAction->actListId+1;
+ return act;
+}
+
+void FGotoCodeGen::writeData()
+{
+ if ( redFsm->anyToStateActions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActionLoc), TSA() );
+ TO_STATE_ACTIONS();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+
+ if ( redFsm->anyFromStateActions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActionLoc), FSA() );
+ FROM_STATE_ACTIONS();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+
+ if ( redFsm->anyEofActions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActionLoc), EA() );
+ EOF_ACTIONS();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+
+ STATE_IDS();
+}
+
+void FGotoCodeGen::writeExec()
+{
+ outLabelUsed = false;
+
+ out << " {\n";
+
+ if ( redFsm->anyRegCurStateRef() )
+ out << " int _ps = 0;\n";
+
+ if ( redFsm->anyConditions() )
+ out << " " << WIDE_ALPH_TYPE() << " _widec;\n";
+
+ if ( hasEnd ) {
+ outLabelUsed = true;
+ out <<
+ " if ( " << P() << " == " << PE() << " )\n"
+ " goto _out;\n";
+ }
+
+ out << "_resume:\n";
+
+ if ( redFsm->anyFromStateActions() ) {
+ out <<
+ " switch ( " << FSA() << "[" << CS() << "] ) {\n";
+ FROM_STATE_ACTION_SWITCH();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ "\n";
+ }
+
+ out <<
+ " switch ( " << CS() << " ) {\n";
+ STATE_GOTOS();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ "\n";
+ TRANSITIONS() <<
+ "\n";
+
+ if ( redFsm->anyRegActions() )
+ EXEC_ACTIONS() << "\n";
+
+ out << "_again:\n";
+
+ if ( redFsm->anyToStateActions() ) {
+ out <<
+ " switch ( " << TSA() << "[" << CS() << "] ) {\n";
+ TO_STATE_ACTION_SWITCH();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ "\n";
+ }
+
+ if ( hasEnd ) {
+ out <<
+ " if ( ++" << P() << " != " << PE() << " )\n"
+ " goto _resume;\n";
+ }
+ else {
+ out <<
+ " " << P() << " += 1;\n"
+ " goto _resume;\n";
+ }
+
+
+ if ( outLabelUsed )
+ out << " _out: {}\n";
+
+ out << " }\n";
+}
+
+void FGotoCodeGen::writeEOF()
+{
+ if ( redFsm->anyEofActions() ) {
+ out <<
+ " {\n"
+ " switch ( " << EA() << "[" << CS() << "] ) {\n";
+ EOF_ACTION_SWITCH();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ " }\n"
+ "\n";
+ }
+}
diff --git a/contrib/tools/ragel5/rlgen-cd/fgotocodegen.h b/contrib/tools/ragel5/rlgen-cd/fgotocodegen.h
new file mode 100644
index 0000000000..076f5c4f7f
--- /dev/null
+++ b/contrib/tools/ragel5/rlgen-cd/fgotocodegen.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ * 2004 Erich Ocean <eric.ocean@ampede.com>
+ * 2005 Alan West <alan@alanz.com>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _FGOTOCODEGEN_H
+#define _FGOTOCODEGEN_H
+
+#include <iostream>
+#include "gotocodegen.h"
+
+/* Forwards. */
+struct CodeGenData;
+
+
+/*
+ * class FGotoCodeGen
+ */
+class FGotoCodeGen : public GotoCodeGen
+{
+public:
+ FGotoCodeGen( ostream &out ) : FsmCodeGen(out), GotoCodeGen(out) {}
+
+ std::ostream &EXEC_ACTIONS();
+ std::ostream &TO_STATE_ACTION_SWITCH();
+ std::ostream &FROM_STATE_ACTION_SWITCH();
+ std::ostream &FINISH_CASES();
+ std::ostream &EOF_ACTION_SWITCH();
+ unsigned int TO_STATE_ACTION( RedStateAp *state );
+ unsigned int FROM_STATE_ACTION( RedStateAp *state );
+ unsigned int EOF_ACTION( RedStateAp *state );
+
+ virtual void writeData();
+ virtual void writeEOF();
+ virtual void writeExec();
+};
+
+/*
+ * class CFGotoCodeGen
+ */
+struct CFGotoCodeGen
+ : public FGotoCodeGen, public CCodeGen
+{
+ CFGotoCodeGen( ostream &out ) :
+ FsmCodeGen(out), FGotoCodeGen(out), CCodeGen(out) {}
+};
+
+/*
+ * class DFGotoCodeGen
+ */
+struct DFGotoCodeGen
+ : public FGotoCodeGen, public DCodeGen
+{
+ DFGotoCodeGen( ostream &out ) :
+ FsmCodeGen(out), FGotoCodeGen(out), DCodeGen(out) {}
+};
+
+#endif /* _FGOTOCODEGEN_H */
diff --git a/contrib/tools/ragel5/rlgen-cd/flatcodegen.cpp b/contrib/tools/ragel5/rlgen-cd/flatcodegen.cpp
new file mode 100644
index 0000000000..117f3798c9
--- /dev/null
+++ b/contrib/tools/ragel5/rlgen-cd/flatcodegen.cpp
@@ -0,0 +1,766 @@
+/*
+ * Copyright 2004-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ * 2004 Erich Ocean <eric.ocean@ampede.com>
+ * 2005 Alan West <alan@alanz.com>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "rlgen-cd.h"
+#include "flatcodegen.h"
+#include "redfsm.h"
+#include "gendata.h"
+
+std::ostream &FlatCodeGen::TO_STATE_ACTION( RedStateAp *state )
+{
+ int act = 0;
+ if ( state->toStateAction != 0 )
+ act = state->toStateAction->location+1;
+ out << act;
+ return out;
+}
+
+std::ostream &FlatCodeGen::FROM_STATE_ACTION( RedStateAp *state )
+{
+ int act = 0;
+ if ( state->fromStateAction != 0 )
+ act = state->fromStateAction->location+1;
+ out << act;
+ return out;
+}
+
+std::ostream &FlatCodeGen::EOF_ACTION( RedStateAp *state )
+{
+ int act = 0;
+ if ( state->eofAction != 0 )
+ act = state->eofAction->location+1;
+ out << act;
+ return out;
+}
+
+std::ostream &FlatCodeGen::TRANS_ACTION( RedTransAp *trans )
+{
+ /* If there are actions, emit them. Otherwise emit zero. */
+ int act = 0;
+ if ( trans->action != 0 )
+ act = trans->action->location+1;
+ out << act;
+ return out;
+}
+
+std::ostream &FlatCodeGen::TO_STATE_ACTION_SWITCH()
+{
+ /* Walk the list of functions, printing the cases. */
+ for ( ActionList::Iter act = actionList; act.lte(); act++ ) {
+ /* Write out referenced actions. */
+ if ( act->numToStateRefs > 0 ) {
+ /* Write the case label, the action and the case break */
+ out << "\tcase " << act->actionId << ":\n";
+ ACTION( out, act, 0, false );
+ out << "\tbreak;\n";
+ }
+ }
+
+ genLineDirective( out );
+ return out;
+}
+
+std::ostream &FlatCodeGen::FROM_STATE_ACTION_SWITCH()
+{
+ /* Walk the list of functions, printing the cases. */
+ for ( ActionList::Iter act = actionList; act.lte(); act++ ) {
+ /* Write out referenced actions. */
+ if ( act->numFromStateRefs > 0 ) {
+ /* Write the case label, the action and the case break */
+ out << "\tcase " << act->actionId << ":\n";
+ ACTION( out, act, 0, false );
+ out << "\tbreak;\n";
+ }
+ }
+
+ genLineDirective( out );
+ return out;
+}
+
+std::ostream &FlatCodeGen::EOF_ACTION_SWITCH()
+{
+ /* Walk the list of functions, printing the cases. */
+ for ( ActionList::Iter act = actionList; act.lte(); act++ ) {
+ /* Write out referenced actions. */
+ if ( act->numEofRefs > 0 ) {
+ /* Write the case label, the action and the case break */
+ out << "\tcase " << act->actionId << ":\n";
+ ACTION( out, act, 0, true );
+ out << "\tbreak;\n";
+ }
+ }
+
+ genLineDirective( out );
+ return out;
+}
+
+
+std::ostream &FlatCodeGen::ACTION_SWITCH()
+{
+ /* Walk the list of functions, printing the cases. */
+ for ( ActionList::Iter act = actionList; act.lte(); act++ ) {
+ /* Write out referenced actions. */
+ if ( act->numTransRefs > 0 ) {
+ /* Write the case label, the action and the case break */
+ out << "\tcase " << act->actionId << ":\n";
+ ACTION( out, act, 0, false );
+ out << "\tbreak;\n";
+ }
+ }
+
+ genLineDirective( out );
+ return out;
+}
+
+
+std::ostream &FlatCodeGen::FLAT_INDEX_OFFSET()
+{
+ out << "\t";
+ int totalStateNum = 0, curIndOffset = 0;
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* Write the index offset. */
+ out << curIndOffset;
+ if ( !st.last() ) {
+ out << ", ";
+ if ( ++totalStateNum % IALL == 0 )
+ out << "\n\t";
+ }
+
+ /* Move the index offset ahead. */
+ if ( st->transList != 0 )
+ curIndOffset += keyOps->span( st->lowKey, st->highKey );
+
+ if ( st->defTrans != 0 )
+ curIndOffset += 1;
+ }
+ out << "\n";
+ return out;
+}
+
+std::ostream &FlatCodeGen::KEY_SPANS()
+{
+ out << "\t";
+ int totalStateNum = 0;
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* Write singles length. */
+ unsigned long long span = 0;
+ if ( st->transList != 0 )
+ span = keyOps->span( st->lowKey, st->highKey );
+ out << span;
+ if ( !st.last() ) {
+ out << ", ";
+ if ( ++totalStateNum % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n";
+ return out;
+}
+
+std::ostream &FlatCodeGen::TO_STATE_ACTIONS()
+{
+ out << "\t";
+ int totalStateNum = 0;
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* Write any eof action. */
+ TO_STATE_ACTION(st);
+ if ( !st.last() ) {
+ out << ", ";
+ if ( ++totalStateNum % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n";
+ return out;
+}
+
+std::ostream &FlatCodeGen::FROM_STATE_ACTIONS()
+{
+ out << "\t";
+ int totalStateNum = 0;
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* Write any eof action. */
+ FROM_STATE_ACTION(st);
+ if ( !st.last() ) {
+ out << ", ";
+ if ( ++totalStateNum % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n";
+ return out;
+}
+
+std::ostream &FlatCodeGen::EOF_ACTIONS()
+{
+ out << "\t";
+ int totalStateNum = 0;
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* Write any eof action. */
+ EOF_ACTION(st);
+ if ( !st.last() ) {
+ out << ", ";
+ if ( ++totalStateNum % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n";
+ return out;
+}
+
+std::ostream &FlatCodeGen::COND_KEYS()
+{
+ out << '\t';
+ int totalTrans = 0;
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* Emit just cond low key and cond high key. */
+ out << KEY( st->condLowKey ) << ", ";
+ out << KEY( st->condHighKey ) << ", ";
+ if ( ++totalTrans % IALL == 0 )
+ out << "\n\t";
+ }
+
+ /* Output one last number so we don't have to figure out when the last
+ * entry is and avoid writing a comma. */
+ out << 0 << "\n";
+ return out;
+}
+
+std::ostream &FlatCodeGen::COND_KEY_SPANS()
+{
+ out << "\t";
+ int totalStateNum = 0;
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* Write singles length. */
+ unsigned long long span = 0;
+ if ( st->condList != 0 )
+ span = keyOps->span( st->condLowKey, st->condHighKey );
+ out << span;
+ if ( !st.last() ) {
+ out << ", ";
+ if ( ++totalStateNum % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n";
+ return out;
+}
+
+std::ostream &FlatCodeGen::CONDS()
+{
+ int totalTrans = 0;
+ out << '\t';
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ if ( st->condList != 0 ) {
+ /* Walk the singles. */
+ unsigned long long span = keyOps->span( st->condLowKey, st->condHighKey );
+ for ( unsigned long long pos = 0; pos < span; pos++ ) {
+ if ( st->condList[pos] != 0 )
+ out << st->condList[pos]->condSpaceId + 1 << ", ";
+ else
+ out << "0, ";
+ if ( ++totalTrans % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+ }
+
+ /* Output one last number so we don't have to figure out when the last
+ * entry is and avoid writing a comma. */
+ out << 0 << "\n";
+ return out;
+}
+
+std::ostream &FlatCodeGen::COND_INDEX_OFFSET()
+{
+ out << "\t";
+ int totalStateNum = 0, curIndOffset = 0;
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* Write the index offset. */
+ out << curIndOffset;
+ if ( !st.last() ) {
+ out << ", ";
+ if ( ++totalStateNum % IALL == 0 )
+ out << "\n\t";
+ }
+
+ /* Move the index offset ahead. */
+ if ( st->condList != 0 )
+ curIndOffset += keyOps->span( st->condLowKey, st->condHighKey );
+ }
+ out << "\n";
+ return out;
+}
+
+
+std::ostream &FlatCodeGen::KEYS()
+{
+ out << '\t';
+ int totalTrans = 0;
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* Emit just low key and high key. */
+ out << KEY( st->lowKey ) << ", ";
+ out << KEY( st->highKey ) << ", ";
+ if ( ++totalTrans % IALL == 0 )
+ out << "\n\t";
+ }
+
+ /* Output one last number so we don't have to figure out when the last
+ * entry is and avoid writing a comma. */
+ out << 0 << "\n";
+ return out;
+}
+
+std::ostream &FlatCodeGen::INDICIES()
+{
+ int totalTrans = 0;
+ out << '\t';
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ if ( st->transList != 0 ) {
+ /* Walk the singles. */
+ unsigned long long span = keyOps->span( st->lowKey, st->highKey );
+ for ( unsigned long long pos = 0; pos < span; pos++ ) {
+ out << st->transList[pos]->id << ", ";
+ if ( ++totalTrans % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+
+ /* The state's default index goes next. */
+ if ( st->defTrans != 0 )
+ out << st->defTrans->id << ", ";
+
+ if ( ++totalTrans % IALL == 0 )
+ out << "\n\t";
+ }
+
+ /* Output one last number so we don't have to figure out when the last
+ * entry is and avoid writing a comma. */
+ out << 0 << "\n";
+ return out;
+}
+
+std::ostream &FlatCodeGen::TRANS_TARGS()
+{
+ /* Transitions must be written ordered by their id. */
+ RedTransAp **transPtrs = new RedTransAp*[redFsm->transSet.length()];
+ for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ )
+ transPtrs[trans->id] = trans;
+
+ /* Keep a count of the num of items in the array written. */
+ out << '\t';
+ int totalStates = 0;
+ for ( int t = 0; t < redFsm->transSet.length(); t++ ) {
+ /* Write out the target state. */
+ RedTransAp *trans = transPtrs[t];
+ out << trans->targ->id;
+ if ( t < redFsm->transSet.length()-1 ) {
+ out << ", ";
+ if ( ++totalStates % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n";
+ delete[] transPtrs;
+ return out;
+}
+
+
+std::ostream &FlatCodeGen::TRANS_ACTIONS()
+{
+ /* Transitions must be written ordered by their id. */
+ RedTransAp **transPtrs = new RedTransAp*[redFsm->transSet.length()];
+ for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ )
+ transPtrs[trans->id] = trans;
+
+ /* Keep a count of the num of items in the array written. */
+ out << '\t';
+ int totalAct = 0;
+ for ( int t = 0; t < redFsm->transSet.length(); t++ ) {
+ /* Write the function for the transition. */
+ RedTransAp *trans = transPtrs[t];
+ TRANS_ACTION( trans );
+ if ( t < redFsm->transSet.length()-1 ) {
+ out << ", ";
+ if ( ++totalAct % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n";
+ delete[] transPtrs;
+ return out;
+}
+
+void FlatCodeGen::LOCATE_TRANS()
+{
+ out <<
+ " _keys = " << ARR_OFF( K(), "(" + CS() + "<<1)" ) << ";\n"
+ " _inds = " << ARR_OFF( I(), IO() + "[" + CS() + "]" ) << ";\n"
+ "\n"
+ " _slen = " << SP() << "[" << CS() << "];\n"
+ " _trans = _inds[ _slen > 0 && _keys[0] <=" << GET_WIDE_KEY() << " &&\n"
+ " " << GET_WIDE_KEY() << " <= _keys[1] ?\n"
+ " " << GET_WIDE_KEY() << " - _keys[0] : _slen ];\n"
+ "\n";
+}
+
+void FlatCodeGen::GOTO( ostream &ret, int gotoDest, bool inFinish )
+{
+ ret << "{" << CS() << " = " << gotoDest << "; " <<
+ CTRL_FLOW() << "goto _again;}";
+}
+
+void FlatCodeGen::GOTO_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish )
+{
+ ret << "{" << CS() << " = (";
+ INLINE_LIST( ret, ilItem->children, 0, inFinish );
+ ret << "); " << CTRL_FLOW() << "goto _again;}";
+}
+
+void FlatCodeGen::CURS( ostream &ret, bool inFinish )
+{
+ ret << "(_ps)";
+}
+
+void FlatCodeGen::TARGS( ostream &ret, bool inFinish, int targState )
+{
+ ret << "(" << CS() << ")";
+}
+
+void FlatCodeGen::NEXT( ostream &ret, int nextDest, bool inFinish )
+{
+ ret << CS() << " = " << nextDest << ";";
+}
+
+void FlatCodeGen::NEXT_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish )
+{
+ ret << CS() << " = (";
+ INLINE_LIST( ret, ilItem->children, 0, inFinish );
+ ret << ");";
+}
+
+void FlatCodeGen::CALL( ostream &ret, int callDest, int targState, bool inFinish )
+{
+ ret << "{" << STACK() << "[" << TOP() << "++] = " << CS() << "; " << CS() << " = " <<
+ callDest << "; " << CTRL_FLOW() << "goto _again;}";
+}
+
+
+void FlatCodeGen::CALL_EXPR( ostream &ret, InlineItem *ilItem, int targState, bool inFinish )
+{
+ ret << "{" << STACK() << "[" << TOP() << "++] = " << CS() << "; " << CS() << " = (";
+ INLINE_LIST( ret, ilItem->children, targState, inFinish );
+ ret << "); " << CTRL_FLOW() << "goto _again;}";
+}
+
+
+void FlatCodeGen::RET( ostream &ret, bool inFinish )
+{
+ ret << "{" << CS() << " = " << STACK() << "[--" << TOP() << "]; " <<
+ CTRL_FLOW() << "goto _again;}";
+}
+
+void FlatCodeGen::BREAK( ostream &ret, int targState )
+{
+ outLabelUsed = true;
+ ret << CTRL_FLOW() << "goto _out;";
+}
+
+void FlatCodeGen::writeData()
+{
+ /* If there are any transtion functions then output the array. If there
+ * are none, don't bother emitting an empty array that won't be used. */
+ if ( redFsm->anyActions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActArrItem), A() );
+ ACTIONS_ARRAY();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+
+ if ( redFsm->anyConditions() ) {
+ OPEN_ARRAY( WIDE_ALPH_TYPE(), CK() );
+ COND_KEYS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxCondSpan), CSP() );
+ COND_KEY_SPANS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxCond), C() );
+ CONDS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxCondIndexOffset), CO() );
+ COND_INDEX_OFFSET();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+
+ OPEN_ARRAY( WIDE_ALPH_TYPE(), K() );
+ KEYS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxSpan), SP() );
+ KEY_SPANS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxFlatIndexOffset), IO() );
+ FLAT_INDEX_OFFSET();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxIndex), I() );
+ INDICIES();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxState), TT() );
+ TRANS_TARGS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ if ( redFsm->anyActions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActionLoc), TA() );
+ TRANS_ACTIONS();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+
+ if ( redFsm->anyToStateActions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActionLoc), TSA() );
+ TO_STATE_ACTIONS();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+
+ if ( redFsm->anyFromStateActions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActionLoc), FSA() );
+ FROM_STATE_ACTIONS();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+
+ if ( redFsm->anyEofActions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActionLoc), EA() );
+ EOF_ACTIONS();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+
+ STATE_IDS();
+}
+
+void FlatCodeGen::COND_TRANSLATE()
+{
+ out <<
+ " _widec = " << GET_KEY() << ";\n";
+
+ out <<
+ " _keys = " << ARR_OFF( CK(), "(" + CS() + "<<1)" ) << ";\n"
+ " _conds = " << ARR_OFF( C(), CO() + "[" + CS() + "]" ) << ";\n"
+ "\n"
+ " _slen = " << CSP() << "[" << CS() << "];\n"
+ " _cond = _slen > 0 && _keys[0] <=" << GET_WIDE_KEY() << " &&\n"
+ " " << GET_WIDE_KEY() << " <= _keys[1] ?\n"
+ " _conds[" << GET_WIDE_KEY() << " - _keys[0]] : 0;\n"
+ "\n";
+
+ out <<
+ " switch ( _cond ) {\n";
+ for ( CondSpaceList::Iter csi = condSpaceList; csi.lte(); csi++ ) {
+ CondSpace *condSpace = csi;
+ out << " case " << condSpace->condSpaceId + 1 << ": {\n";
+ out << TABS(2) << "_widec = " << CAST(WIDE_ALPH_TYPE()) << "(" <<
+ KEY(condSpace->baseKey) << " + (" << GET_KEY() <<
+ " - " << KEY(keyOps->minKey) << "));\n";
+
+ for ( CondSet::Iter csi = condSpace->condSet; csi.lte(); csi++ ) {
+ out << TABS(2) << "if ( ";
+ CONDITION( out, *csi );
+ Size condValOffset = ((1 << csi.pos()) * keyOps->alphSize());
+ out << " ) _widec += " << condValOffset << ";\n";
+ }
+
+ out << " }\n";
+ out << " break;\n";
+ }
+
+ SWITCH_DEFAULT();
+
+ out <<
+ " }\n";
+}
+
+void FlatCodeGen::writeExec()
+{
+ outLabelUsed = false;
+
+ out <<
+ " {\n"
+ " int _slen";
+
+ if ( redFsm->anyRegCurStateRef() )
+ out << ", _ps";
+
+ out <<
+ ";\n"
+ " int _trans";
+
+ if ( redFsm->anyConditions() )
+ out << ", _cond";
+ out << ";\n";
+
+ if ( redFsm->anyToStateActions() ||
+ redFsm->anyRegActions() || redFsm->anyFromStateActions() )
+ {
+ out <<
+ " " << PTR_CONST() << ARRAY_TYPE(redFsm->maxActArrItem) << POINTER() << "_acts;\n"
+ " " << UINT() << " _nacts;\n";
+ }
+
+ out <<
+ " " << PTR_CONST() << WIDE_ALPH_TYPE() << POINTER() << "_keys;\n"
+ " " << PTR_CONST() << ARRAY_TYPE(redFsm->maxIndex) << POINTER() << "_inds;\n";
+
+ if ( redFsm->anyConditions() ) {
+ out <<
+ " " << PTR_CONST() << ARRAY_TYPE(redFsm->maxCond) << POINTER() << "_conds;\n"
+ " " << WIDE_ALPH_TYPE() << " _widec;\n";
+ }
+
+ out << "\n";
+
+ if ( hasEnd ) {
+ outLabelUsed = true;
+ out <<
+ " if ( " << P() << " == " << PE() << " )\n"
+ " goto _out;\n";
+ }
+
+ out << "_resume:\n";
+
+ if ( redFsm->errState != 0 ) {
+ outLabelUsed = true;
+ out <<
+ " if ( " << CS() << " == " << redFsm->errState->id << " )\n"
+ " goto _out;\n";
+ }
+
+ if ( redFsm->anyFromStateActions() ) {
+ out <<
+ " _acts = " << ARR_OFF( A(), FSA() + "[" + CS() + "]" ) << ";\n"
+ " _nacts = " << CAST(UINT()) << " *_acts++;\n"
+ " while ( _nacts-- > 0 ) {\n"
+ " switch ( *_acts++ ) {\n";
+ FROM_STATE_ACTION_SWITCH();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ " }\n"
+ "\n";
+ }
+
+ if ( redFsm->anyConditions() )
+ COND_TRANSLATE();
+
+ LOCATE_TRANS();
+
+ if ( redFsm->anyRegCurStateRef() )
+ out << " _ps = " << CS() << ";\n";
+
+ out <<
+ " " << CS() << " = " << TT() << "[_trans];\n"
+ "\n";
+
+ if ( redFsm->anyRegActions() ) {
+ out <<
+ " if ( " << TA() << "[_trans] == 0 )\n"
+ " goto _again;\n"
+ "\n"
+ " _acts = " << ARR_OFF( A(), TA() + "[_trans]" ) << ";\n"
+ " _nacts = " << CAST(UINT()) << " *_acts++;\n"
+ " while ( _nacts-- > 0 ) {\n"
+ " switch ( *(_acts++) )\n {\n";
+ ACTION_SWITCH();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ " }\n"
+ "\n";
+ }
+
+ if ( redFsm->anyRegActions() || redFsm->anyActionGotos() ||
+ redFsm->anyActionCalls() || redFsm->anyActionRets() )
+ out << "_again:\n";
+
+ if ( redFsm->anyToStateActions() ) {
+ out <<
+ " _acts = " << ARR_OFF( A(), TSA() + "[" + CS() + "]" ) << ";\n"
+ " _nacts = " << CAST(UINT()) << " *_acts++;\n"
+ " while ( _nacts-- > 0 ) {\n"
+ " switch ( *_acts++ ) {\n";
+ TO_STATE_ACTION_SWITCH();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ " }\n"
+ "\n";
+ }
+
+ if ( hasEnd ) {
+ out <<
+ " if ( ++" << P() << " != " << PE() << " )\n"
+ " goto _resume;\n";
+ }
+ else {
+ out <<
+ " " << P() << " += 1;\n"
+ " goto _resume;\n";
+ }
+
+ if ( outLabelUsed )
+ out << " _out: {}\n";
+
+ out << " }\n";
+}
+
+void FlatCodeGen::writeEOF()
+{
+ if ( redFsm->anyEofActions() ) {
+ out <<
+ " {\n"
+ " " << PTR_CONST() << ARRAY_TYPE(redFsm->maxActArrItem) << POINTER() << "_acts = " <<
+ ARR_OFF( A(), EA() + "[" + CS() + "]" ) << ";\n"
+ " " << UINT() << " _nacts = " << CAST(UINT()) << " *_acts++;\n"
+ " while ( _nacts-- > 0 ) {\n"
+ " switch ( *_acts++ ) {\n";
+ EOF_ACTION_SWITCH();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ " }\n"
+ " }\n"
+ "\n";
+ }
+}
diff --git a/contrib/tools/ragel5/rlgen-cd/flatcodegen.h b/contrib/tools/ragel5/rlgen-cd/flatcodegen.h
new file mode 100644
index 0000000000..27dee2ef92
--- /dev/null
+++ b/contrib/tools/ragel5/rlgen-cd/flatcodegen.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright 2004-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ * 2004 Erich Ocean <eric.ocean@ampede.com>
+ * 2005 Alan West <alan@alanz.com>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _FLATCODEGEN_H
+#define _FLATCODEGEN_H
+
+#include <iostream>
+#include "fsmcodegen.h"
+
+/* Forwards. */
+struct CodeGenData;
+struct NameInst;
+struct RedTransAp;
+struct RedStateAp;
+
+/*
+ * FlatCodeGen
+ */
+class FlatCodeGen : virtual public FsmCodeGen
+{
+public:
+ FlatCodeGen( ostream &out ) : FsmCodeGen(out) {}
+ virtual ~FlatCodeGen() { }
+
+protected:
+ std::ostream &TO_STATE_ACTION_SWITCH();
+ std::ostream &FROM_STATE_ACTION_SWITCH();
+ std::ostream &EOF_ACTION_SWITCH();
+ std::ostream &ACTION_SWITCH();
+ std::ostream &KEYS();
+ std::ostream &INDICIES();
+ std::ostream &FLAT_INDEX_OFFSET();
+ std::ostream &KEY_SPANS();
+ std::ostream &TO_STATE_ACTIONS();
+ std::ostream &FROM_STATE_ACTIONS();
+ std::ostream &EOF_ACTIONS();
+ std::ostream &TRANS_TARGS();
+ std::ostream &TRANS_ACTIONS();
+ void LOCATE_TRANS();
+
+ std::ostream &COND_INDEX_OFFSET();
+ void COND_TRANSLATE();
+ std::ostream &CONDS();
+ std::ostream &COND_KEYS();
+ std::ostream &COND_KEY_SPANS();
+
+ void GOTO( ostream &ret, int gotoDest, bool inFinish );
+ void CALL( ostream &ret, int callDest, int targState, bool inFinish );
+ void NEXT( ostream &ret, int nextDest, bool inFinish );
+ void GOTO_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish );
+ void NEXT_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish );
+ void CALL_EXPR( ostream &ret, InlineItem *ilItem, int targState, bool inFinish );
+ void CURS( ostream &ret, bool inFinish );
+ void TARGS( ostream &ret, bool inFinish, int targState );
+ void RET( ostream &ret, bool inFinish );
+ void BREAK( ostream &ret, int targState );
+
+ virtual std::ostream &TO_STATE_ACTION( RedStateAp *state );
+ virtual std::ostream &FROM_STATE_ACTION( RedStateAp *state );
+ virtual std::ostream &EOF_ACTION( RedStateAp *state );
+ virtual std::ostream &TRANS_ACTION( RedTransAp *trans );
+
+ virtual void writeData();
+ virtual void writeEOF();
+ virtual void writeExec();
+};
+
+/*
+ * CFlatCodeGen
+ */
+struct CFlatCodeGen
+ : public FlatCodeGen, public CCodeGen
+{
+ CFlatCodeGen( ostream &out ) :
+ FsmCodeGen(out), FlatCodeGen(out), CCodeGen(out) {}
+};
+
+/*
+ * DFlatCodeGen
+ */
+struct DFlatCodeGen
+ : public FlatCodeGen, public DCodeGen
+{
+ DFlatCodeGen( ostream &out ) :
+ FsmCodeGen(out), FlatCodeGen(out), DCodeGen(out) {}
+};
+
+#endif /* _FLATCODEGEN_H */
diff --git a/contrib/tools/ragel5/rlgen-cd/fsmcodegen.cpp b/contrib/tools/ragel5/rlgen-cd/fsmcodegen.cpp
new file mode 100644
index 0000000000..c0fc4b00f5
--- /dev/null
+++ b/contrib/tools/ragel5/rlgen-cd/fsmcodegen.cpp
@@ -0,0 +1,749 @@
+/*
+ * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ * 2004 Erich Ocean <eric.ocean@ampede.com>
+ * 2005 Alan West <alan@alanz.com>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "rlgen-cd.h"
+#include "fsmcodegen.h"
+#include "redfsm.h"
+#include "gendata.h"
+#include <sstream>
+#include <string>
+#include <assert.h>
+
+
+using std::ostream;
+using std::ostringstream;
+using std::string;
+using std::cerr;
+using std::endl;
+
+void lineDirective( ostream &out, char *fileName, int line )
+{
+ if ( noLineDirectives )
+ out << "/* ";
+
+ /* Write the preprocessor line info for to the input file. */
+ out << "#line " << line << " \"";
+ for ( char *pc = fileName; *pc != 0; pc++ ) {
+ if ( *pc == '\\' )
+ out << "\\\\";
+ else
+ out << *pc;
+ }
+ out << '"';
+
+ if ( noLineDirectives )
+ out << " */";
+
+ out << '\n';
+}
+
+void genLineDirective( ostream &out )
+{
+ std::streambuf *sbuf = out.rdbuf();
+ output_filter *filter = static_cast<output_filter*>(sbuf);
+ lineDirective( out, filter->fileName, filter->line + 1 );
+}
+
+
+/* Init code gen with in parameters. */
+FsmCodeGen::FsmCodeGen( ostream &out )
+:
+ CodeGenData(out)
+{
+}
+
+unsigned int FsmCodeGen::arrayTypeSize( unsigned long maxVal )
+{
+ long long maxValLL = (long long) maxVal;
+ HostType *arrayType = keyOps->typeSubsumes( maxValLL );
+ assert( arrayType != 0 );
+ return arrayType->size;
+}
+
+string FsmCodeGen::ARRAY_TYPE( unsigned long maxVal )
+{
+ long long maxValLL = (long long) maxVal;
+ HostType *arrayType = keyOps->typeSubsumes( maxValLL );
+ assert( arrayType != 0 );
+
+ string ret = arrayType->data1;
+ if ( arrayType->data2 != 0 ) {
+ ret += " ";
+ ret += arrayType->data2;
+ }
+ return ret;
+}
+
+
+/* Write out the fsm name. */
+string FsmCodeGen::FSM_NAME()
+{
+ return fsmName;
+}
+
+/* Emit the offset of the start state as a decimal integer. */
+string FsmCodeGen::START_STATE_ID()
+{
+ ostringstream ret;
+ ret << redFsm->startState->id;
+ return ret.str();
+};
+
+/* Write out the array of actions. */
+std::ostream &FsmCodeGen::ACTIONS_ARRAY()
+{
+ out << "\t0, ";
+ int totalActions = 1;
+ for ( ActionTableMap::Iter act = redFsm->actionMap; act.lte(); act++ ) {
+ /* Write out the length, which will never be the last character. */
+ out << act->key.length() << ", ";
+ /* Put in a line break every 8 */
+ if ( totalActions++ % 8 == 7 )
+ out << "\n\t";
+
+ for ( ActionTable::Iter item = act->key; item.lte(); item++ ) {
+ out << item->value->actionId;
+ if ( ! (act.last() && item.last()) )
+ out << ", ";
+
+ /* Put in a line break every 8 */
+ if ( totalActions++ % 8 == 7 )
+ out << "\n\t";
+ }
+ }
+ out << "\n";
+ return out;
+}
+
+
+string FsmCodeGen::CS()
+{
+ ostringstream ret;
+ if ( curStateExpr != 0 ) {
+ /* Emit the user supplied method of retrieving the key. */
+ ret << "(";
+ INLINE_LIST( ret, curStateExpr, 0, false );
+ ret << ")";
+ }
+ else {
+ /* Expression for retrieving the key, use simple dereference. */
+ ret << ACCESS() << "cs";
+ }
+ return ret.str();
+}
+
+string FsmCodeGen::ACCESS()
+{
+ ostringstream ret;
+ if ( accessExpr != 0 )
+ INLINE_LIST( ret, accessExpr, 0, false );
+ return ret.str();
+}
+
+string FsmCodeGen::GET_WIDE_KEY()
+{
+ if ( redFsm->anyConditions() )
+ return "_widec";
+ else
+ return GET_KEY();
+}
+
+string FsmCodeGen::GET_WIDE_KEY( RedStateAp *state )
+{
+ if ( state->stateCondList.length() > 0 )
+ return "_widec";
+ else
+ return GET_KEY();
+}
+
+string FsmCodeGen::GET_KEY()
+{
+ ostringstream ret;
+ if ( getKeyExpr != 0 ) {
+ /* Emit the user supplied method of retrieving the key. */
+ ret << "(";
+ INLINE_LIST( ret, getKeyExpr, 0, false );
+ ret << ")";
+ }
+ else {
+ /* Expression for retrieving the key, use simple dereference. */
+ ret << "(*" << P() << ")";
+ }
+ return ret.str();
+}
+
+/* Write out level number of tabs. Makes the nested binary search nice
+ * looking. */
+string FsmCodeGen::TABS( int level )
+{
+ string result;
+ while ( level-- > 0 )
+ result += "\t";
+ return result;
+}
+
+/* Write out a key from the fsm code gen. Depends on wether or not the key is
+ * signed. */
+string FsmCodeGen::KEY( Key key )
+{
+ ostringstream ret;
+ if ( keyOps->isSigned || !hostLang->explicitUnsigned )
+ ret << key.getVal();
+ else
+ ret << (unsigned long) key.getVal() << 'u';
+ return ret.str();
+}
+
+void FsmCodeGen::EXEC( ostream &ret, InlineItem *item, int targState, int inFinish )
+{
+ /* The parser gives fexec two children. The double brackets are for D
+ * code. If the inline list is a single word it will get interpreted as a
+ * C-style cast by the D compiler. */
+ ret << "{" << P() << " = ((";
+ INLINE_LIST( ret, item->children, targState, inFinish );
+ ret << "))-1;}";
+}
+
+void FsmCodeGen::EXECTE( ostream &ret, InlineItem *item, int targState, int inFinish )
+{
+ /* Tokend version of exec. */
+
+ /* The parser gives fexec two children. The double brackets are for D
+ * code. If the inline list is a single word it will get interpreted as a
+ * C-style cast by the D compiler. */
+ ret << "{" << TOKEND() << " = ((";
+ INLINE_LIST( ret, item->children, targState, inFinish );
+ ret << "));}";
+}
+
+
+void FsmCodeGen::LM_SWITCH( ostream &ret, InlineItem *item,
+ int targState, int inFinish )
+{
+ ret <<
+ " switch( " << ACT() << " ) {\n";
+
+ /* If the switch handles error then we also forced the error state. It
+ * will exist. */
+ if ( item->handlesError ) {
+ ret << " case 0: " << TOKEND() << " = " << TOKSTART() << "; ";
+ GOTO( ret, redFsm->errState->id, inFinish );
+ ret << "\n";
+ }
+
+ for ( InlineList::Iter lma = *item->children; lma.lte(); lma++ ) {
+ /* Write the case label, the action and the case break. */
+ ret << " case " << lma->lmId << ":\n";
+
+ /* Write the block and close it off. */
+ ret << " {";
+ INLINE_LIST( ret, lma->children, targState, inFinish );
+ ret << "}\n";
+
+ ret << " break;\n";
+ }
+ /* Default required for D code. */
+ ret <<
+ " default: break;\n"
+ " }\n"
+ "\t";
+}
+
+void FsmCodeGen::SET_ACT( ostream &ret, InlineItem *item )
+{
+ ret << ACT() << " = " << item->lmId << ";";
+}
+
+void FsmCodeGen::SET_TOKEND( ostream &ret, InlineItem *item )
+{
+ /* The tokend action sets tokend. */
+ ret << TOKEND() << " = " << P();
+ if ( item->offset != 0 )
+ out << "+" << item->offset;
+ out << ";";
+}
+
+void FsmCodeGen::GET_TOKEND( ostream &ret, InlineItem *item )
+{
+ ret << TOKEND();
+}
+
+void FsmCodeGen::INIT_TOKSTART( ostream &ret, InlineItem *item )
+{
+ ret << TOKSTART() << " = " << NULL_ITEM() << ";";
+}
+
+void FsmCodeGen::INIT_ACT( ostream &ret, InlineItem *item )
+{
+ ret << ACT() << " = 0;";
+}
+
+void FsmCodeGen::SET_TOKSTART( ostream &ret, InlineItem *item )
+{
+ ret << TOKSTART() << " = " << P() << ";";
+}
+
+void FsmCodeGen::SUB_ACTION( ostream &ret, InlineItem *item,
+ int targState, bool inFinish )
+{
+ if ( item->children->length() > 0 ) {
+ /* Write the block and close it off. */
+ ret << "{";
+ INLINE_LIST( ret, item->children, targState, inFinish );
+ ret << "}";
+ }
+}
+
+
+/* Write out an inline tree structure. Walks the list and possibly calls out
+ * to virtual functions than handle language specific items in the tree. */
+void FsmCodeGen::INLINE_LIST( ostream &ret, InlineList *inlineList,
+ int targState, bool inFinish )
+{
+ for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
+ switch ( item->type ) {
+ case InlineItem::Text:
+ ret << item->data;
+ break;
+ case InlineItem::Goto:
+ GOTO( ret, item->targState->id, inFinish );
+ break;
+ case InlineItem::Call:
+ CALL( ret, item->targState->id, targState, inFinish );
+ break;
+ case InlineItem::Next:
+ NEXT( ret, item->targState->id, inFinish );
+ break;
+ case InlineItem::Ret:
+ RET( ret, inFinish );
+ break;
+ case InlineItem::PChar:
+ ret << P();
+ break;
+ case InlineItem::Char:
+ ret << GET_KEY();
+ break;
+ case InlineItem::Hold:
+ ret << P() << "--;";
+ break;
+ case InlineItem::Exec:
+ EXEC( ret, item, targState, inFinish );
+ break;
+ case InlineItem::HoldTE:
+ ret << TOKEND() << "--;";
+ break;
+ case InlineItem::ExecTE:
+ EXECTE( ret, item, targState, inFinish );
+ break;
+ case InlineItem::Curs:
+ CURS( ret, inFinish );
+ break;
+ case InlineItem::Targs:
+ TARGS( ret, inFinish, targState );
+ break;
+ case InlineItem::Entry:
+ ret << item->targState->id;
+ break;
+ case InlineItem::GotoExpr:
+ GOTO_EXPR( ret, item, inFinish );
+ break;
+ case InlineItem::CallExpr:
+ CALL_EXPR( ret, item, targState, inFinish );
+ break;
+ case InlineItem::NextExpr:
+ NEXT_EXPR( ret, item, inFinish );
+ break;
+ case InlineItem::LmSwitch:
+ LM_SWITCH( ret, item, targState, inFinish );
+ break;
+ case InlineItem::LmSetActId:
+ SET_ACT( ret, item );
+ break;
+ case InlineItem::LmSetTokEnd:
+ SET_TOKEND( ret, item );
+ break;
+ case InlineItem::LmGetTokEnd:
+ GET_TOKEND( ret, item );
+ break;
+ case InlineItem::LmInitTokStart:
+ INIT_TOKSTART( ret, item );
+ break;
+ case InlineItem::LmInitAct:
+ INIT_ACT( ret, item );
+ break;
+ case InlineItem::LmSetTokStart:
+ SET_TOKSTART( ret, item );
+ break;
+ case InlineItem::SubAction:
+ SUB_ACTION( ret, item, targState, inFinish );
+ break;
+ case InlineItem::Break:
+ BREAK( ret, targState );
+ break;
+ }
+ }
+}
+/* Write out paths in line directives. Escapes any special characters. */
+string FsmCodeGen::LDIR_PATH( char *path )
+{
+ ostringstream ret;
+ for ( char *pc = path; *pc != 0; pc++ ) {
+ if ( *pc == '\\' )
+ ret << "\\\\";
+ else
+ ret << *pc;
+ }
+ return ret.str();
+}
+
+void FsmCodeGen::ACTION( ostream &ret, Action *action, int targState, bool inFinish )
+{
+ /* Write the preprocessor line info for going into the source file. */
+ lineDirective( ret, sourceFileName, action->loc.line );
+
+ /* Write the block and close it off. */
+ ret << "\t{";
+ INLINE_LIST( ret, action->inlineList, targState, inFinish );
+ ret << "}\n";
+}
+
+void FsmCodeGen::CONDITION( ostream &ret, Action *condition )
+{
+ ret << "\n";
+ lineDirective( ret, sourceFileName, condition->loc.line );
+ INLINE_LIST( ret, condition->inlineList, 0, false );
+}
+
+string FsmCodeGen::ERROR_STATE()
+{
+ ostringstream ret;
+ if ( redFsm->errState != 0 )
+ ret << redFsm->errState->id;
+ else
+ ret << "-1";
+ return ret.str();
+}
+
+string FsmCodeGen::FIRST_FINAL_STATE()
+{
+ ostringstream ret;
+ if ( redFsm->firstFinState != 0 )
+ ret << redFsm->firstFinState->id;
+ else
+ ret << redFsm->nextStateId;
+ return ret.str();
+}
+
+void FsmCodeGen::writeInit()
+{
+ out << " {\n";
+
+ if ( redFsm->startState != 0 )
+ out << "\t" << CS() << " = " << START() << ";\n";
+
+ /* If there are any calls, then the stack top needs initialization. */
+ if ( redFsm->anyActionCalls() || redFsm->anyActionRets() )
+ out << "\t" << TOP() << " = 0;\n";
+
+ if ( hasLongestMatch ) {
+ out <<
+ " " << TOKSTART() << " = " << NULL_ITEM() << ";\n"
+ " " << TOKEND() << " = " << NULL_ITEM() << ";\n"
+ " " << ACT() << " = 0;\n";
+ }
+ out << " }\n";
+}
+
+string FsmCodeGen::DATA_PREFIX()
+{
+ if ( dataPrefix )
+ return FSM_NAME() + "_";
+ return "";
+}
+
+/* Emit the alphabet data type. */
+string FsmCodeGen::ALPH_TYPE()
+{
+ string ret = keyOps->alphType->data1;
+ if ( keyOps->alphType->data2 != 0 ) {
+ ret += " ";
+ ret += + keyOps->alphType->data2;
+ }
+ return ret;
+}
+
+/* Emit the alphabet data type. */
+string FsmCodeGen::WIDE_ALPH_TYPE()
+{
+ string ret;
+ if ( redFsm->maxKey <= keyOps->maxKey )
+ ret = ALPH_TYPE();
+ else {
+ long long maxKeyVal = redFsm->maxKey.getLongLong();
+ HostType *wideType = keyOps->typeSubsumes( keyOps->isSigned, maxKeyVal );
+ assert( wideType != 0 );
+
+ ret = wideType->data1;
+ if ( wideType->data2 != 0 ) {
+ ret += " ";
+ ret += wideType->data2;
+ }
+ }
+ return ret;
+}
+
+void FsmCodeGen::STATE_IDS()
+{
+ if ( redFsm->startState != 0 )
+ STATIC_VAR( "int", START() ) << " = " << START_STATE_ID() << "};\n";
+
+ if ( writeFirstFinal )
+ STATIC_VAR( "int" , FIRST_FINAL() ) << " = " << FIRST_FINAL_STATE() << "};\n";
+
+ if ( writeErr )
+ STATIC_VAR( "int", ERROR() ) << " = " << ERROR_STATE() << "};\n";
+
+ out << "\n";
+
+ if ( entryPointNames.length() > 0 ) {
+ for ( EntryNameVect::Iter en = entryPointNames; en.lte(); en++ ) {
+ STATIC_VAR( "int", DATA_PREFIX() + "en_" + *en ) <<
+ " = " << entryPointIds[en.pos()] << "};\n";
+ }
+ out << "\n";
+ }
+}
+
+
+/*
+ * Language specific, but style independent code generators functions.
+ */
+
+string CCodeGen::PTR_CONST()
+{
+ return "const ";
+}
+
+std::ostream &CCodeGen::OPEN_ARRAY( const string& type, const string& name )
+{
+ out << "#if defined(__GNUC__)\n";
+ out << "static __attribute__((used)) const " << type << " " << name << "[] = {\n";
+ out << "#else\n";
+ out << "static const " << type << " " << name << "[] = {\n";
+ out << "#endif\n";
+ return out;
+}
+
+std::ostream &CCodeGen::CLOSE_ARRAY()
+{
+ return out << "};\n";
+}
+
+std::ostream &CCodeGen::STATIC_VAR( const string& type, const string& name )
+{
+ out << "enum {" << name;
+ return out;
+}
+
+string CCodeGen::UINT( )
+{
+ return "unsigned int";
+}
+
+string CCodeGen::ARR_OFF( const string& ptr, const string& offset )
+{
+ return ptr + " + " + offset;
+}
+
+string CCodeGen::CAST( const string& type )
+{
+ return "(" + type + ")";
+}
+
+string CCodeGen::NULL_ITEM()
+{
+ return "0";
+}
+
+string CCodeGen::POINTER()
+{
+ return " *";
+}
+
+std::ostream &CCodeGen::SWITCH_DEFAULT()
+{
+ return out;
+}
+
+string CCodeGen::CTRL_FLOW()
+{
+ return "";
+}
+
+void CCodeGen::writeExports()
+{
+ if ( exportList.length() > 0 ) {
+ for ( ExportList::Iter ex = exportList; ex.lte(); ex++ ) {
+ out << "#define " << DATA_PREFIX() << "ex_" << ex->name << " " <<
+ KEY(ex->key) << "\n";
+ }
+ out << "\n";
+ }
+}
+
+/*
+ * D Specific
+ */
+
+string DCodeGen::NULL_ITEM()
+{
+ return "null";
+}
+
+string DCodeGen::POINTER()
+{
+ // multiple items seperated by commas can also be pointer types.
+ return "* ";
+}
+
+string DCodeGen::PTR_CONST()
+{
+ return "";
+}
+
+std::ostream &DCodeGen::OPEN_ARRAY( const string& type, const string& name )
+{
+ out << "static const " << type << "[] " << name << " = [\n";
+ return out;
+}
+
+std::ostream &DCodeGen::CLOSE_ARRAY()
+{
+ return out << "];\n";
+}
+
+std::ostream &DCodeGen::STATIC_VAR( const string& type, const string& name )
+{
+ out << "static const " << type << " " << name;
+ return out;
+}
+
+string DCodeGen::ARR_OFF( const string& ptr, const string& offset )
+{
+ return "&" + ptr + "[" + offset + "]";
+}
+
+string DCodeGen::CAST( const string& type )
+{
+ return "cast(" + type + ")";
+}
+
+string DCodeGen::UINT( )
+{
+ return "uint";
+}
+
+std::ostream &DCodeGen::SWITCH_DEFAULT()
+{
+ out << " default: break;\n";
+ return out;
+}
+
+string DCodeGen::CTRL_FLOW()
+{
+ return "if (true) ";
+}
+
+void DCodeGen::writeExports()
+{
+ if ( exportList.length() > 0 ) {
+ for ( ExportList::Iter ex = exportList; ex.lte(); ex++ ) {
+ out << "static const " << ALPH_TYPE() << " " << DATA_PREFIX() <<
+ "ex_" << ex->name << " = " << KEY(ex->key) << ";\n";
+ }
+ out << "\n";
+ }
+}
+
+/*
+ * End D-specific code.
+ */
+
+void FsmCodeGen::finishRagelDef()
+{
+ if ( codeStyle == GenGoto || codeStyle == GenFGoto ||
+ codeStyle == GenIpGoto || codeStyle == GenSplit )
+ {
+ /* For directly executable machines there is no required state
+ * ordering. Choose a depth-first ordering to increase the
+ * potential for fall-throughs. */
+ redFsm->depthFirstOrdering();
+ }
+ else {
+ /* The frontend will do this for us, but it may be a good idea to
+ * force it if the intermediate file is edited. */
+ redFsm->sortByStateId();
+ }
+
+ /* Choose default transitions and the single transition. */
+ redFsm->chooseDefaultSpan();
+
+ /* Maybe do flat expand, otherwise choose single. */
+ if ( codeStyle == GenFlat || codeStyle == GenFFlat )
+ redFsm->makeFlat();
+ else
+ redFsm->chooseSingle();
+
+ /* If any errors have occured in the input file then don't write anything. */
+ if ( gblErrorCount > 0 )
+ return;
+
+ if ( codeStyle == GenSplit )
+ redFsm->partitionFsm( numSplitPartitions );
+
+ if ( codeStyle == GenIpGoto || codeStyle == GenSplit )
+ redFsm->setInTrans();
+
+ /* Anlayze Machine will find the final action reference counts, among
+ * other things. We will use these in reporting the usage
+ * of fsm directives in action code. */
+ analyzeMachine();
+
+ /* Determine if we should use indicies. */
+ calcIndexSize();
+}
+
+ostream &FsmCodeGen::source_warning( const InputLoc &loc )
+{
+ cerr << sourceFileName << ":" << loc.line << ":" << loc.col << ": warning: ";
+ return cerr;
+}
+
+ostream &FsmCodeGen::source_error( const InputLoc &loc )
+{
+ gblErrorCount += 1;
+ assert( sourceFileName != 0 );
+ cerr << sourceFileName << ":" << loc.line << ":" << loc.col << ": ";
+ return cerr;
+}
+
diff --git a/contrib/tools/ragel5/rlgen-cd/fsmcodegen.h b/contrib/tools/ragel5/rlgen-cd/fsmcodegen.h
new file mode 100644
index 0000000000..77c76f1b1a
--- /dev/null
+++ b/contrib/tools/ragel5/rlgen-cd/fsmcodegen.h
@@ -0,0 +1,218 @@
+/*
+ * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ * 2004 Erich Ocean <eric.ocean@ampede.com>
+ * 2005 Alan West <alan@alanz.com>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _FSMCODEGEN_H
+#define _FSMCODEGEN_H
+
+#include <iostream>
+#include <string>
+#include <stdio.h>
+#include "common.h"
+#include "gendata.h"
+
+using std::string;
+using std::ostream;
+
+/* Integer array line length. */
+#define IALL 8
+
+/* Forwards. */
+struct RedFsmAp;
+struct RedStateAp;
+struct CodeGenData;
+struct Action;
+struct NameInst;
+struct InlineItem;
+struct InlineList;
+struct RedAction;
+struct LongestMatch;
+struct LongestMatchPart;
+
+inline string itoa( int i )
+{
+ char buf[16];
+ sprintf( buf, "%i", i );
+ return buf;
+}
+
+/*
+ * class FsmCodeGen
+ */
+class FsmCodeGen : public CodeGenData
+{
+public:
+ FsmCodeGen( ostream &out );
+ virtual ~FsmCodeGen() {}
+
+ virtual void finishRagelDef();
+ virtual void writeInit();
+
+protected:
+ string FSM_NAME();
+ string START_STATE_ID();
+ ostream &ACTIONS_ARRAY();
+ string GET_WIDE_KEY();
+ string GET_WIDE_KEY( RedStateAp *state );
+ string TABS( int level );
+ string KEY( Key key );
+ string LDIR_PATH( char *path );
+ void ACTION( ostream &ret, Action *action, int targState, bool inFinish );
+ void CONDITION( ostream &ret, Action *condition );
+ string ALPH_TYPE();
+ string WIDE_ALPH_TYPE();
+ string ARRAY_TYPE( unsigned long maxVal );
+
+ virtual string ARR_OFF( const string& ptr, const string& offset ) = 0;
+ virtual string CAST( const string& type ) = 0;
+ virtual string UINT() = 0;
+ virtual string NULL_ITEM() = 0;
+ virtual string POINTER() = 0;
+ virtual string GET_KEY();
+ virtual ostream &SWITCH_DEFAULT() = 0;
+
+ string P() { return "p"; }
+ string PE() { return "pe"; }
+
+ string ACCESS();
+ string CS();
+ string STACK() { return ACCESS() + "stack"; }
+ string TOP() { return ACCESS() + "top"; }
+ string TOKSTART() { return ACCESS() + "tokstart"; }
+ string TOKEND() { return ACCESS() + "tokend"; }
+ string ACT() { return ACCESS() + "act"; }
+
+ string DATA_PREFIX();
+ string PM() { return "_" + DATA_PREFIX() + "partition_map"; }
+ string C() { return "_" + DATA_PREFIX() + "cond_spaces"; }
+ string CK() { return "_" + DATA_PREFIX() + "cond_keys"; }
+ string K() { return "_" + DATA_PREFIX() + "trans_keys"; }
+ string I() { return "_" + DATA_PREFIX() + "indicies"; }
+ string CO() { return "_" + DATA_PREFIX() + "cond_offsets"; }
+ string KO() { return "_" + DATA_PREFIX() + "key_offsets"; }
+ string IO() { return "_" + DATA_PREFIX() + "index_offsets"; }
+ string CL() { return "_" + DATA_PREFIX() + "cond_lengths"; }
+ string SL() { return "_" + DATA_PREFIX() + "single_lengths"; }
+ string RL() { return "_" + DATA_PREFIX() + "range_lengths"; }
+ string A() { return "_" + DATA_PREFIX() + "actions"; }
+ string TA() { return "_" + DATA_PREFIX() + "trans_actions_wi"; }
+ string TT() { return "_" + DATA_PREFIX() + "trans_targs_wi"; }
+ string TSA() { return "_" + DATA_PREFIX() + "to_state_actions"; }
+ string FSA() { return "_" + DATA_PREFIX() + "from_state_actions"; }
+ string EA() { return "_" + DATA_PREFIX() + "eof_actions"; }
+ string SP() { return "_" + DATA_PREFIX() + "key_spans"; }
+ string CSP() { return "_" + DATA_PREFIX() + "cond_key_spans"; }
+ string START() { return DATA_PREFIX() + "start"; }
+ string ERROR() { return DATA_PREFIX() + "error"; }
+ string FIRST_FINAL() { return DATA_PREFIX() + "first_final"; }
+ string CTXDATA() { return DATA_PREFIX() + "ctxdata"; }
+
+ void INLINE_LIST( ostream &ret, InlineList *inlineList, int targState, bool inFinish );
+ virtual void GOTO( ostream &ret, int gotoDest, bool inFinish ) = 0;
+ virtual void CALL( ostream &ret, int callDest, int targState, bool inFinish ) = 0;
+ virtual void NEXT( ostream &ret, int nextDest, bool inFinish ) = 0;
+ virtual void GOTO_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish ) = 0;
+ virtual void NEXT_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish ) = 0;
+ virtual void CALL_EXPR( ostream &ret, InlineItem *ilItem,
+ int targState, bool inFinish ) = 0;
+ virtual void RET( ostream &ret, bool inFinish ) = 0;
+ virtual void BREAK( ostream &ret, int targState ) = 0;
+ virtual void CURS( ostream &ret, bool inFinish ) = 0;
+ virtual void TARGS( ostream &ret, bool inFinish, int targState ) = 0;
+ void EXEC( ostream &ret, InlineItem *item, int targState, int inFinish );
+ void EXECTE( ostream &ret, InlineItem *item, int targState, int inFinish );
+ void LM_SWITCH( ostream &ret, InlineItem *item, int targState, int inFinish );
+ void SET_ACT( ostream &ret, InlineItem *item );
+ void INIT_TOKSTART( ostream &ret, InlineItem *item );
+ void INIT_ACT( ostream &ret, InlineItem *item );
+ void SET_TOKSTART( ostream &ret, InlineItem *item );
+ void SET_TOKEND( ostream &ret, InlineItem *item );
+ void GET_TOKEND( ostream &ret, InlineItem *item );
+ void SUB_ACTION( ostream &ret, InlineItem *item,
+ int targState, bool inFinish );
+ void STATE_IDS();
+
+ string ERROR_STATE();
+ string FIRST_FINAL_STATE();
+
+ virtual string PTR_CONST() = 0;
+ virtual ostream &OPEN_ARRAY( const string& type, const string& name ) = 0;
+ virtual ostream &CLOSE_ARRAY() = 0;
+ virtual ostream &STATIC_VAR( const string& type, const string& name ) = 0;
+
+ virtual string CTRL_FLOW() = 0;
+
+ ostream &source_warning(const InputLoc &loc);
+ ostream &source_error(const InputLoc &loc);
+
+ unsigned int arrayTypeSize( unsigned long maxVal );
+
+ bool outLabelUsed;
+ bool againLabelUsed;
+ bool useIndicies;
+
+public:
+ /* Determine if we should use indicies. */
+ virtual void calcIndexSize() {}
+};
+
+class CCodeGen : virtual public FsmCodeGen
+{
+public:
+ CCodeGen( ostream &out ) : FsmCodeGen(out) {}
+
+ virtual string NULL_ITEM();
+ virtual string POINTER();
+ virtual ostream &SWITCH_DEFAULT();
+ virtual ostream &OPEN_ARRAY( const string& type, const string& name );
+ virtual ostream &CLOSE_ARRAY();
+ virtual ostream &STATIC_VAR( const string& type, const string& name );
+ virtual string ARR_OFF( const string& ptr, const string& offset );
+ virtual string CAST( const string& type );
+ virtual string UINT();
+ virtual string PTR_CONST();
+ virtual string CTRL_FLOW();
+
+ virtual void writeExports();
+};
+
+class DCodeGen : virtual public FsmCodeGen
+{
+public:
+ DCodeGen( ostream &out ) : FsmCodeGen(out) {}
+
+ virtual string NULL_ITEM();
+ virtual string POINTER();
+ virtual ostream &SWITCH_DEFAULT();
+ virtual ostream &OPEN_ARRAY( const string& type, const string& name );
+ virtual ostream &CLOSE_ARRAY();
+ virtual ostream &STATIC_VAR( const string& type, const string& name );
+ virtual string ARR_OFF( const string& ptr, const string& offset );
+ virtual string CAST( const string& type );
+ virtual string UINT();
+ virtual string PTR_CONST();
+ virtual string CTRL_FLOW();
+
+ virtual void writeExports();
+};
+
+#endif /* _FSMCODEGEN_H */
diff --git a/contrib/tools/ragel5/rlgen-cd/ftabcodegen.cpp b/contrib/tools/ragel5/rlgen-cd/ftabcodegen.cpp
new file mode 100644
index 0000000000..1d65e7102c
--- /dev/null
+++ b/contrib/tools/ragel5/rlgen-cd/ftabcodegen.cpp
@@ -0,0 +1,405 @@
+/*
+ * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ * 2004 Erich Ocean <eric.ocean@ampede.com>
+ * 2005 Alan West <alan@alanz.com>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "rlgen-cd.h"
+#include "ftabcodegen.h"
+#include "redfsm.h"
+#include "gendata.h"
+
+/* Determine if we should use indicies or not. */
+void FTabCodeGen::calcIndexSize()
+{
+ int sizeWithInds = 0, sizeWithoutInds = 0;
+
+ /* Calculate cost of using with indicies. */
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ int totalIndex = st->outSingle.length() + st->outRange.length() +
+ (st->defTrans == 0 ? 0 : 1);
+ sizeWithInds += arrayTypeSize(redFsm->maxIndex) * totalIndex;
+ }
+ sizeWithInds += arrayTypeSize(redFsm->maxState) * redFsm->transSet.length();
+ if ( redFsm->anyActions() )
+ sizeWithInds += arrayTypeSize(redFsm->maxActListId) * redFsm->transSet.length();
+
+ /* Calculate the cost of not using indicies. */
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ int totalIndex = st->outSingle.length() + st->outRange.length() +
+ (st->defTrans == 0 ? 0 : 1);
+ sizeWithoutInds += arrayTypeSize(redFsm->maxState) * totalIndex;
+ if ( redFsm->anyActions() )
+ sizeWithoutInds += arrayTypeSize(redFsm->maxActListId) * totalIndex;
+ }
+
+ /* If using indicies reduces the size, use them. */
+ useIndicies = sizeWithInds < sizeWithoutInds;
+}
+
+std::ostream &FTabCodeGen::TO_STATE_ACTION( RedStateAp *state )
+{
+ int act = 0;
+ if ( state->toStateAction != 0 )
+ act = state->toStateAction->actListId+1;
+ out << act;
+ return out;
+}
+
+std::ostream &FTabCodeGen::FROM_STATE_ACTION( RedStateAp *state )
+{
+ int act = 0;
+ if ( state->fromStateAction != 0 )
+ act = state->fromStateAction->actListId+1;
+ out << act;
+ return out;
+}
+
+std::ostream &FTabCodeGen::EOF_ACTION( RedStateAp *state )
+{
+ int act = 0;
+ if ( state->eofAction != 0 )
+ act = state->eofAction->actListId+1;
+ out << act;
+ return out;
+}
+
+
+/* Write out the function for a transition. */
+std::ostream &FTabCodeGen::TRANS_ACTION( RedTransAp *trans )
+{
+ int action = 0;
+ if ( trans->action != 0 )
+ action = trans->action->actListId+1;
+ out << action;
+ return out;
+}
+
+/* Write out the function switch. This switch is keyed on the values
+ * of the func index. */
+std::ostream &FTabCodeGen::TO_STATE_ACTION_SWITCH()
+{
+ /* Loop the actions. */
+ for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) {
+ if ( redAct->numToStateRefs > 0 ) {
+ /* Write the entry label. */
+ out << "\tcase " << redAct->actListId+1 << ":\n";
+
+ /* Write each action in the list of action items. */
+ for ( ActionTable::Iter item = redAct->key; item.lte(); item++ )
+ ACTION( out, item->value, 0, false );
+
+ out << "\tbreak;\n";
+ }
+ }
+
+ genLineDirective( out );
+ return out;
+}
+
+/* Write out the function switch. This switch is keyed on the values
+ * of the func index. */
+std::ostream &FTabCodeGen::FROM_STATE_ACTION_SWITCH()
+{
+ /* Loop the actions. */
+ for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) {
+ if ( redAct->numFromStateRefs > 0 ) {
+ /* Write the entry label. */
+ out << "\tcase " << redAct->actListId+1 << ":\n";
+
+ /* Write each action in the list of action items. */
+ for ( ActionTable::Iter item = redAct->key; item.lte(); item++ )
+ ACTION( out, item->value, 0, false );
+
+ out << "\tbreak;\n";
+ }
+ }
+
+ genLineDirective( out );
+ return out;
+}
+
+std::ostream &FTabCodeGen::EOF_ACTION_SWITCH()
+{
+ /* Loop the actions. */
+ for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) {
+ if ( redAct->numEofRefs > 0 ) {
+ /* Write the entry label. */
+ out << "\tcase " << redAct->actListId+1 << ":\n";
+
+ /* Write each action in the list of action items. */
+ for ( ActionTable::Iter item = redAct->key; item.lte(); item++ )
+ ACTION( out, item->value, 0, true );
+
+ out << "\tbreak;\n";
+ }
+ }
+
+ genLineDirective( out );
+ return out;
+}
+
+/* Write out the function switch. This switch is keyed on the values
+ * of the func index. */
+std::ostream &FTabCodeGen::ACTION_SWITCH()
+{
+ /* Loop the actions. */
+ for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) {
+ if ( redAct->numTransRefs > 0 ) {
+ /* Write the entry label. */
+ out << "\tcase " << redAct->actListId+1 << ":\n";
+
+ /* Write each action in the list of action items. */
+ for ( ActionTable::Iter item = redAct->key; item.lte(); item++ )
+ ACTION( out, item->value, 0, false );
+
+ out << "\tbreak;\n";
+ }
+ }
+
+ genLineDirective( out );
+ return out;
+}
+
+void FTabCodeGen::writeData()
+{
+ if ( redFsm->anyConditions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxCondOffset), CO() );
+ COND_OFFSETS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxCondLen), CL() );
+ COND_LENS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( WIDE_ALPH_TYPE(), CK() );
+ COND_KEYS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxCondSpaceId), C() );
+ COND_SPACES();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxKeyOffset), KO() );
+ KEY_OFFSETS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( WIDE_ALPH_TYPE(), K() );
+ KEYS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxSingleLen), SL() );
+ SINGLE_LENS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxRangeLen), RL() );
+ RANGE_LENS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxIndexOffset), IO() );
+ INDEX_OFFSETS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ if ( useIndicies ) {
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxIndex), I() );
+ INDICIES();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxState), TT() );
+ TRANS_TARGS_WI();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ if ( redFsm->anyActions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActListId), TA() );
+ TRANS_ACTIONS_WI();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+ }
+ else {
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxState), TT() );
+ TRANS_TARGS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ if ( redFsm->anyActions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActListId), TA() );
+ TRANS_ACTIONS();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+ }
+
+ if ( redFsm->anyToStateActions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActionLoc), TSA() );
+ TO_STATE_ACTIONS();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+
+ if ( redFsm->anyFromStateActions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActionLoc), FSA() );
+ FROM_STATE_ACTIONS();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+
+ if ( redFsm->anyEofActions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActListId), EA() );
+ EOF_ACTIONS();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+
+ STATE_IDS();
+}
+
+void FTabCodeGen::writeExec()
+{
+ outLabelUsed = false;
+
+ out <<
+ " {\n"
+ " int _klen";
+
+ if ( redFsm->anyRegCurStateRef() )
+ out << ", _ps";
+
+ out <<
+ ";\n"
+ " " << PTR_CONST() << WIDE_ALPH_TYPE() << POINTER() << "_keys;\n"
+ " int _trans;\n";
+
+ if ( redFsm->anyConditions() )
+ out << " " << WIDE_ALPH_TYPE() << " _widec;\n";
+
+ out << "\n";
+
+ if ( hasEnd ) {
+ outLabelUsed = true;
+ out <<
+ " if ( " << P() << " == " << PE() << " )\n"
+ " goto _out;\n";
+ }
+
+ out << "_resume:\n";
+
+ if ( redFsm->errState != 0 ) {
+ outLabelUsed = true;
+ out <<
+ " if ( " << CS() << " == " << redFsm->errState->id << " )\n"
+ " goto _out;\n";
+ }
+
+ if ( redFsm->anyFromStateActions() ) {
+ out <<
+ " switch ( " << FSA() << "[" << CS() << "] ) {\n";
+ FROM_STATE_ACTION_SWITCH();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ "\n";
+ }
+
+ if ( redFsm->anyConditions() )
+ COND_TRANSLATE();
+
+ LOCATE_TRANS();
+
+ out << "_match:\n";
+
+ if ( redFsm->anyRegCurStateRef() )
+ out << " _ps = " << CS() << ";\n";
+
+ if ( useIndicies )
+ out << " _trans = " << I() << "[_trans];\n";
+
+ out <<
+ " " << CS() << " = " << TT() << "[_trans];\n"
+ "\n";
+
+ if ( redFsm->anyRegActions() ) {
+ out <<
+ " if ( " << TA() << "[_trans] == 0 )\n"
+ " goto _again;\n"
+ "\n"
+ " switch ( " << TA() << "[_trans] ) {\n";
+ ACTION_SWITCH();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ "\n";
+ }
+
+ if ( redFsm->anyRegActions() || redFsm->anyActionGotos() ||
+ redFsm->anyActionCalls() || redFsm->anyActionRets() )
+ out << "_again:\n";
+
+ if ( redFsm->anyToStateActions() ) {
+ out <<
+ " switch ( " << TSA() << "[" << CS() << "] ) {\n";
+ TO_STATE_ACTION_SWITCH();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ "\n";
+ }
+
+ if ( hasEnd ) {
+ out <<
+ " if ( ++" << P() << " != " << PE() << " )\n"
+ " goto _resume;\n";
+ }
+ else {
+ out <<
+ " " << P() << " += 1;\n"
+ " goto _resume;\n";
+ }
+
+
+ if ( outLabelUsed )
+ out << " _out: {}\n";
+
+ out << " }\n";
+}
+
+
+void FTabCodeGen::writeEOF()
+{
+ if ( redFsm->anyEofActions() ) {
+ out <<
+ " {\n"
+ " switch ( " << EA() << "[" << CS() << "] ) {\n";
+ EOF_ACTION_SWITCH();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ " }\n"
+ "\n";
+ }
+}
diff --git a/contrib/tools/ragel5/rlgen-cd/ftabcodegen.h b/contrib/tools/ragel5/rlgen-cd/ftabcodegen.h
new file mode 100644
index 0000000000..9d26d1cadd
--- /dev/null
+++ b/contrib/tools/ragel5/rlgen-cd/ftabcodegen.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ * 2004 Erich Ocean <eric.ocean@ampede.com>
+ * 2005 Alan West <alan@alanz.com>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _FTABCODEGEN_H
+#define _FTABCODEGEN_H
+
+#include <iostream>
+#include "tabcodegen.h"
+
+/* Forwards. */
+struct CodeGenData;
+
+
+/*
+ * FTabCodeG\verb|e
+ */
+class FTabCodeGen : public TabCodeGen
+{
+protected:
+ FTabCodeGen( ostream &out ) : FsmCodeGen(out), TabCodeGen(out) {}
+
+ std::ostream &TO_STATE_ACTION_SWITCH();
+ std::ostream &FROM_STATE_ACTION_SWITCH();
+ std::ostream &EOF_ACTION_SWITCH();
+ std::ostream &ACTION_SWITCH();
+
+ virtual std::ostream &TO_STATE_ACTION( RedStateAp *state );
+ virtual std::ostream &FROM_STATE_ACTION( RedStateAp *state );
+ virtual std::ostream &EOF_ACTION( RedStateAp *state );
+ virtual std::ostream &TRANS_ACTION( RedTransAp *trans );
+ virtual void writeData();
+ virtual void writeEOF();
+ virtual void writeExec();
+ virtual void calcIndexSize();
+};
+
+
+/*
+ * CFTabCodeGen
+ */
+struct CFTabCodeGen
+ : public FTabCodeGen, public CCodeGen
+{
+ CFTabCodeGen( ostream &out ) :
+ FsmCodeGen(out), FTabCodeGen(out), CCodeGen(out) {}
+};
+
+/*
+ * class DFTabCodeGen
+ */
+struct DFTabCodeGen
+ : public FTabCodeGen, public DCodeGen
+{
+ DFTabCodeGen( ostream &out ) :
+ FsmCodeGen(out), FTabCodeGen(out), DCodeGen(out) {}
+};
+
+#endif /* _FTABCODEGEN_H */
diff --git a/contrib/tools/ragel5/rlgen-cd/gotocodegen.cpp b/contrib/tools/ragel5/rlgen-cd/gotocodegen.cpp
new file mode 100644
index 0000000000..13be67d097
--- /dev/null
+++ b/contrib/tools/ragel5/rlgen-cd/gotocodegen.cpp
@@ -0,0 +1,742 @@
+/*
+ * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ * 2004 Erich Ocean <eric.ocean@ampede.com>
+ * 2005 Alan West <alan@alanz.com>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "rlgen-cd.h"
+#include "gotocodegen.h"
+#include "redfsm.h"
+#include "bstmap.h"
+#include "gendata.h"
+
+/* Emit the goto to take for a given transition. */
+std::ostream &GotoCodeGen::TRANS_GOTO( RedTransAp *trans, int level )
+{
+ out << TABS(level) << "goto tr" << trans->id << ";";
+ return out;
+}
+
+std::ostream &GotoCodeGen::TO_STATE_ACTION_SWITCH()
+{
+ /* Walk the list of functions, printing the cases. */
+ for ( ActionList::Iter act = actionList; act.lte(); act++ ) {
+ /* Write out referenced actions. */
+ if ( act->numToStateRefs > 0 ) {
+ /* Write the case label, the action and the case break. */
+ out << "\tcase " << act->actionId << ":\n";
+ ACTION( out, act, 0, false );
+ out << "\tbreak;\n";
+ }
+ }
+
+ genLineDirective( out );
+ return out;
+}
+
+std::ostream &GotoCodeGen::FROM_STATE_ACTION_SWITCH()
+{
+ /* Walk the list of functions, printing the cases. */
+ for ( ActionList::Iter act = actionList; act.lte(); act++ ) {
+ /* Write out referenced actions. */
+ if ( act->numFromStateRefs > 0 ) {
+ /* Write the case label, the action and the case break. */
+ out << "\tcase " << act->actionId << ":\n";
+ ACTION( out, act, 0, false );
+ out << "\tbreak;\n";
+ }
+ }
+
+ genLineDirective( out );
+ return out;
+}
+
+std::ostream &GotoCodeGen::EOF_ACTION_SWITCH()
+{
+ /* Walk the list of functions, printing the cases. */
+ for ( ActionList::Iter act = actionList; act.lte(); act++ ) {
+ /* Write out referenced actions. */
+ if ( act->numEofRefs > 0 ) {
+ /* Write the case label, the action and the case break. */
+ out << "\tcase " << act->actionId << ":\n";
+ ACTION( out, act, 0, true );
+ out << "\tbreak;\n";
+ }
+ }
+
+ genLineDirective( out );
+ return out;
+}
+
+std::ostream &GotoCodeGen::ACTION_SWITCH()
+{
+ /* Walk the list of functions, printing the cases. */
+ for ( ActionList::Iter act = actionList; act.lte(); act++ ) {
+ /* Write out referenced actions. */
+ if ( act->numTransRefs > 0 ) {
+ /* Write the case label, the action and the case break. */
+ out << "\tcase " << act->actionId << ":\n";
+ ACTION( out, act, 0, false );
+ out << "\tbreak;\n";
+ }
+ }
+
+ genLineDirective( out );
+ return out;
+}
+
+void GotoCodeGen::GOTO_HEADER( RedStateAp *state )
+{
+ /* Label the state. */
+ out << "case " << state->id << ":\n";
+}
+
+
+void GotoCodeGen::emitSingleSwitch( RedStateAp *state )
+{
+ /* Load up the singles. */
+ int numSingles = state->outSingle.length();
+ RedTransEl *data = state->outSingle.data;
+
+ if ( numSingles == 1 ) {
+ /* If there is a single single key then write it out as an if. */
+ out << "\tif ( " << GET_WIDE_KEY(state) << " == " <<
+ KEY(data[0].lowKey) << " )\n\t\t";
+
+ /* Virtual function for writing the target of the transition. */
+ TRANS_GOTO(data[0].value, 0) << "\n";
+ }
+ else if ( numSingles > 1 ) {
+ /* Write out single keys in a switch if there is more than one. */
+ out << "\tswitch( " << GET_WIDE_KEY(state) << " ) {\n";
+
+ /* Write out the single indicies. */
+ for ( int j = 0; j < numSingles; j++ ) {
+ out << "\t\tcase " << KEY(data[j].lowKey) << ": ";
+ TRANS_GOTO(data[j].value, 0) << "\n";
+ }
+
+ /* Emits a default case for D code. */
+ SWITCH_DEFAULT();
+
+ /* Close off the transition switch. */
+ out << "\t}\n";
+ }
+}
+
+void GotoCodeGen::emitRangeBSearch( RedStateAp *state, int level, int low, int high )
+{
+ /* Get the mid position, staying on the lower end of the range. */
+ int mid = (low + high) >> 1;
+ RedTransEl *data = state->outRange.data;
+
+ /* Determine if we need to look higher or lower. */
+ bool anyLower = mid > low;
+ bool anyHigher = mid < high;
+
+ /* Determine if the keys at mid are the limits of the alphabet. */
+ bool limitLow = data[mid].lowKey == keyOps->minKey;
+ bool limitHigh = data[mid].highKey == keyOps->maxKey;
+
+ if ( anyLower && anyHigher ) {
+ /* Can go lower and higher than mid. */
+ out << TABS(level) << "if ( " << GET_WIDE_KEY(state) << " < " <<
+ KEY(data[mid].lowKey) << " ) {\n";
+ emitRangeBSearch( state, level+1, low, mid-1 );
+ out << TABS(level) << "} else if ( " << GET_WIDE_KEY(state) << " > " <<
+ KEY(data[mid].highKey) << " ) {\n";
+ emitRangeBSearch( state, level+1, mid+1, high );
+ out << TABS(level) << "} else\n";
+ TRANS_GOTO(data[mid].value, level+1) << "\n";
+ }
+ else if ( anyLower && !anyHigher ) {
+ /* Can go lower than mid but not higher. */
+ out << TABS(level) << "if ( " << GET_WIDE_KEY(state) << " < " <<
+ KEY(data[mid].lowKey) << " ) {\n";
+ emitRangeBSearch( state, level+1, low, mid-1 );
+
+ /* if the higher is the highest in the alphabet then there is no
+ * sense testing it. */
+ if ( limitHigh ) {
+ out << TABS(level) << "} else\n";
+ TRANS_GOTO(data[mid].value, level+1) << "\n";
+ }
+ else {
+ out << TABS(level) << "} else if ( " << GET_WIDE_KEY(state) << " <= " <<
+ KEY(data[mid].highKey) << " )\n";
+ TRANS_GOTO(data[mid].value, level+1) << "\n";
+ }
+ }
+ else if ( !anyLower && anyHigher ) {
+ /* Can go higher than mid but not lower. */
+ out << TABS(level) << "if ( " << GET_WIDE_KEY(state) << " > " <<
+ KEY(data[mid].highKey) << " ) {\n";
+ emitRangeBSearch( state, level+1, mid+1, high );
+
+ /* If the lower end is the lowest in the alphabet then there is no
+ * sense testing it. */
+ if ( limitLow ) {
+ out << TABS(level) << "} else\n";
+ TRANS_GOTO(data[mid].value, level+1) << "\n";
+ }
+ else {
+ out << TABS(level) << "} else if ( " << GET_WIDE_KEY(state) << " >= " <<
+ KEY(data[mid].lowKey) << " )\n";
+ TRANS_GOTO(data[mid].value, level+1) << "\n";
+ }
+ }
+ else {
+ /* Cannot go higher or lower than mid. It's mid or bust. What
+ * tests to do depends on limits of alphabet. */
+ if ( !limitLow && !limitHigh ) {
+ out << TABS(level) << "if ( " << KEY(data[mid].lowKey) << " <= " <<
+ GET_WIDE_KEY(state) << " && " << GET_WIDE_KEY(state) << " <= " <<
+ KEY(data[mid].highKey) << " )\n";
+ TRANS_GOTO(data[mid].value, level+1) << "\n";
+ }
+ else if ( limitLow && !limitHigh ) {
+ out << TABS(level) << "if ( " << GET_WIDE_KEY(state) << " <= " <<
+ KEY(data[mid].highKey) << " )\n";
+ TRANS_GOTO(data[mid].value, level+1) << "\n";
+ }
+ else if ( !limitLow && limitHigh ) {
+ out << TABS(level) << "if ( " << KEY(data[mid].lowKey) << " <= " <<
+ GET_WIDE_KEY(state) << " )\n";
+ TRANS_GOTO(data[mid].value, level+1) << "\n";
+ }
+ else {
+ /* Both high and low are at the limit. No tests to do. */
+ TRANS_GOTO(data[mid].value, level+1) << "\n";
+ }
+ }
+}
+
+void GotoCodeGen::STATE_GOTO_ERROR()
+{
+ /* Label the state and bail immediately. */
+ outLabelUsed = true;
+ RedStateAp *state = redFsm->errState;
+ out << "case " << state->id << ":\n";
+ out << " goto _out;\n";
+}
+
+void GotoCodeGen::COND_TRANSLATE( StateCond *stateCond, int level )
+{
+ CondSpace *condSpace = stateCond->condSpace;
+ out << TABS(level) << "_widec = " << CAST(WIDE_ALPH_TYPE()) << "(" <<
+ KEY(condSpace->baseKey) << " + (" << GET_KEY() <<
+ " - " << KEY(keyOps->minKey) << "));\n";
+
+ for ( CondSet::Iter csi = condSpace->condSet; csi.lte(); csi++ ) {
+ out << TABS(level) << "if ( ";
+ CONDITION( out, *csi );
+ Size condValOffset = ((1 << csi.pos()) * keyOps->alphSize());
+ out << " ) _widec += " << condValOffset << ";\n";
+ }
+}
+
+void GotoCodeGen::emitCondBSearch( RedStateAp *state, int level, int low, int high )
+{
+ /* Get the mid position, staying on the lower end of the range. */
+ int mid = (low + high) >> 1;
+ StateCond **data = state->stateCondVect.data;
+
+ /* Determine if we need to look higher or lower. */
+ bool anyLower = mid > low;
+ bool anyHigher = mid < high;
+
+ /* Determine if the keys at mid are the limits of the alphabet. */
+ bool limitLow = data[mid]->lowKey == keyOps->minKey;
+ bool limitHigh = data[mid]->highKey == keyOps->maxKey;
+
+ if ( anyLower && anyHigher ) {
+ /* Can go lower and higher than mid. */
+ out << TABS(level) << "if ( " << GET_KEY() << " < " <<
+ KEY(data[mid]->lowKey) << " ) {\n";
+ emitCondBSearch( state, level+1, low, mid-1 );
+ out << TABS(level) << "} else if ( " << GET_KEY() << " > " <<
+ KEY(data[mid]->highKey) << " ) {\n";
+ emitCondBSearch( state, level+1, mid+1, high );
+ out << TABS(level) << "} else {\n";
+ COND_TRANSLATE(data[mid], level+1);
+ out << TABS(level) << "}\n";
+ }
+ else if ( anyLower && !anyHigher ) {
+ /* Can go lower than mid but not higher. */
+ out << TABS(level) << "if ( " << GET_KEY() << " < " <<
+ KEY(data[mid]->lowKey) << " ) {\n";
+ emitCondBSearch( state, level+1, low, mid-1 );
+
+ /* if the higher is the highest in the alphabet then there is no
+ * sense testing it. */
+ if ( limitHigh ) {
+ out << TABS(level) << "} else {\n";
+ COND_TRANSLATE(data[mid], level+1);
+ out << TABS(level) << "}\n";
+ }
+ else {
+ out << TABS(level) << "} else if ( " << GET_KEY() << " <= " <<
+ KEY(data[mid]->highKey) << " ) {\n";
+ COND_TRANSLATE(data[mid], level+1);
+ out << TABS(level) << "}\n";
+ }
+ }
+ else if ( !anyLower && anyHigher ) {
+ /* Can go higher than mid but not lower. */
+ out << TABS(level) << "if ( " << GET_KEY() << " > " <<
+ KEY(data[mid]->highKey) << " ) {\n";
+ emitCondBSearch( state, level+1, mid+1, high );
+
+ /* If the lower end is the lowest in the alphabet then there is no
+ * sense testing it. */
+ if ( limitLow ) {
+ out << TABS(level) << "} else {\n";
+ COND_TRANSLATE(data[mid], level+1);
+ out << TABS(level) << "}\n";
+ }
+ else {
+ out << TABS(level) << "} else if ( " << GET_KEY() << " >= " <<
+ KEY(data[mid]->lowKey) << " ) {\n";
+ COND_TRANSLATE(data[mid], level+1);
+ out << TABS(level) << "}\n";
+ }
+ }
+ else {
+ /* Cannot go higher or lower than mid. It's mid or bust. What
+ * tests to do depends on limits of alphabet. */
+ if ( !limitLow && !limitHigh ) {
+ out << TABS(level) << "if ( " << KEY(data[mid]->lowKey) << " <= " <<
+ GET_KEY() << " && " << GET_KEY() << " <= " <<
+ KEY(data[mid]->highKey) << " ) {\n";
+ COND_TRANSLATE(data[mid], level+1);
+ out << TABS(level) << "}\n";
+ }
+ else if ( limitLow && !limitHigh ) {
+ out << TABS(level) << "if ( " << GET_KEY() << " <= " <<
+ KEY(data[mid]->highKey) << " ) {\n";
+ COND_TRANSLATE(data[mid], level+1);
+ out << TABS(level) << "}\n";
+ }
+ else if ( !limitLow && limitHigh ) {
+ out << TABS(level) << "if ( " << KEY(data[mid]->lowKey) << " <= " <<
+ GET_KEY() << " )\n {";
+ COND_TRANSLATE(data[mid], level+1);
+ out << TABS(level) << "}\n";
+ }
+ else {
+ /* Both high and low are at the limit. No tests to do. */
+ COND_TRANSLATE(data[mid], level);
+ }
+ }
+}
+
+std::ostream &GotoCodeGen::STATE_GOTOS()
+{
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ if ( st == redFsm->errState )
+ STATE_GOTO_ERROR();
+ else {
+ /* Writing code above state gotos. */
+ GOTO_HEADER( st );
+
+ if ( st->stateCondVect.length() > 0 ) {
+ out << " _widec = " << GET_KEY() << ";\n";
+ emitCondBSearch( st, 1, 0, st->stateCondVect.length() - 1 );
+ }
+
+ /* Try singles. */
+ if ( st->outSingle.length() > 0 )
+ emitSingleSwitch( st );
+
+ /* Default case is to binary search for the ranges, if that fails then */
+ if ( st->outRange.length() > 0 )
+ emitRangeBSearch( st, 1, 0, st->outRange.length() - 1 );
+
+ /* Write the default transition. */
+ TRANS_GOTO( st->defTrans, 1 ) << "\n";
+ }
+ }
+ return out;
+}
+
+std::ostream &GotoCodeGen::TRANSITIONS()
+{
+ /* Emit any transitions that have functions and that go to
+ * this state. */
+ for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) {
+ /* Write the label for the transition so it can be jumped to. */
+ out << " tr" << trans->id << ": ";
+
+ /* Destination state. */
+ if ( trans->action != 0 && trans->action->anyCurStateRef() )
+ out << "_ps = " << CS() << ";";
+ out << CS() << " = " << trans->targ->id << "; ";
+
+ if ( trans->action != 0 ) {
+ /* Write out the transition func. */
+ out << "goto f" << trans->action->actListId << ";\n";
+ }
+ else {
+ /* No code to execute, just loop around. */
+ out << "goto _again;\n";
+ }
+ }
+ return out;
+}
+
+std::ostream &GotoCodeGen::EXEC_FUNCS()
+{
+ /* Make labels that set acts and jump to execFuncs. Loop func indicies. */
+ for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) {
+ if ( redAct->numTransRefs > 0 ) {
+ out << " f" << redAct->actListId << ": " <<
+ "_acts = " << ARR_OFF(A(), itoa( redAct->location+1 ) ) << ";"
+ " goto execFuncs;\n";
+ }
+ }
+
+ out <<
+ "\n"
+ "execFuncs:\n"
+ " _nacts = *_acts++;\n"
+ " while ( _nacts-- > 0 ) {\n"
+ " switch ( *_acts++ ) {\n";
+ ACTION_SWITCH();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ " }\n"
+ " goto _again;\n";
+ return out;
+}
+
+unsigned int GotoCodeGen::TO_STATE_ACTION( RedStateAp *state )
+{
+ int act = 0;
+ if ( state->toStateAction != 0 )
+ act = state->toStateAction->location+1;
+ return act;
+}
+
+unsigned int GotoCodeGen::FROM_STATE_ACTION( RedStateAp *state )
+{
+ int act = 0;
+ if ( state->fromStateAction != 0 )
+ act = state->fromStateAction->location+1;
+ return act;
+}
+
+unsigned int GotoCodeGen::EOF_ACTION( RedStateAp *state )
+{
+ int act = 0;
+ if ( state->eofAction != 0 )
+ act = state->eofAction->location+1;
+ return act;
+}
+
+std::ostream &GotoCodeGen::TO_STATE_ACTIONS()
+{
+ /* Take one off for the psuedo start state. */
+ int numStates = redFsm->stateList.length();
+ unsigned int *vals = new unsigned int[numStates];
+ memset( vals, 0, sizeof(unsigned int)*numStates );
+
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ )
+ vals[st->id] = TO_STATE_ACTION(st);
+
+ out << "\t";
+ for ( int st = 0; st < redFsm->nextStateId; st++ ) {
+ /* Write any eof action. */
+ out << vals[st];
+ if ( st < numStates-1 ) {
+ out << ", ";
+ if ( (st+1) % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n";
+ delete[] vals;
+ return out;
+}
+
+std::ostream &GotoCodeGen::FROM_STATE_ACTIONS()
+{
+ /* Take one off for the psuedo start state. */
+ int numStates = redFsm->stateList.length();
+ unsigned int *vals = new unsigned int[numStates];
+ memset( vals, 0, sizeof(unsigned int)*numStates );
+
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ )
+ vals[st->id] = FROM_STATE_ACTION(st);
+
+ out << "\t";
+ for ( int st = 0; st < redFsm->nextStateId; st++ ) {
+ /* Write any eof action. */
+ out << vals[st];
+ if ( st < numStates-1 ) {
+ out << ", ";
+ if ( (st+1) % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n";
+ delete[] vals;
+ return out;
+}
+
+std::ostream &GotoCodeGen::EOF_ACTIONS()
+{
+ /* Take one off for the psuedo start state. */
+ int numStates = redFsm->stateList.length();
+ unsigned int *vals = new unsigned int[numStates];
+ memset( vals, 0, sizeof(unsigned int)*numStates );
+
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ )
+ vals[st->id] = EOF_ACTION(st);
+
+ out << "\t";
+ for ( int st = 0; st < redFsm->nextStateId; st++ ) {
+ /* Write any eof action. */
+ out << vals[st];
+ if ( st < numStates-1 ) {
+ out << ", ";
+ if ( (st+1) % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n";
+ delete[] vals;
+ return out;
+}
+
+std::ostream &GotoCodeGen::FINISH_CASES()
+{
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* States that are final and have an out action need a case. */
+ if ( st->eofAction != 0 ) {
+ /* Write the case label. */
+ out << "\t\tcase " << st->id << ": ";
+
+ /* Write the goto func. */
+ out << "goto f" << st->eofAction->actListId << ";\n";
+ }
+ }
+
+ return out;
+}
+
+void GotoCodeGen::GOTO( ostream &ret, int gotoDest, bool inFinish )
+{
+ ret << "{" << CS() << " = " << gotoDest << "; " <<
+ CTRL_FLOW() << "goto _again;}";
+}
+
+void GotoCodeGen::GOTO_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish )
+{
+ ret << "{" << CS() << " = (";
+ INLINE_LIST( ret, ilItem->children, 0, inFinish );
+ ret << "); " << CTRL_FLOW() << "goto _again;}";
+}
+
+void GotoCodeGen::CURS( ostream &ret, bool inFinish )
+{
+ ret << "(_ps)";
+}
+
+void GotoCodeGen::TARGS( ostream &ret, bool inFinish, int targState )
+{
+ ret << "(" << CS() << ")";
+}
+
+void GotoCodeGen::NEXT( ostream &ret, int nextDest, bool inFinish )
+{
+ ret << CS() << " = " << nextDest << ";";
+}
+
+void GotoCodeGen::NEXT_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish )
+{
+ ret << CS() << " = (";
+ INLINE_LIST( ret, ilItem->children, 0, inFinish );
+ ret << ");";
+}
+
+void GotoCodeGen::CALL( ostream &ret, int callDest, int targState, bool inFinish )
+{
+ ret << "{" << STACK() << "[" << TOP() << "++] = " << CS() << "; " << CS() << " = " <<
+ callDest << "; " << CTRL_FLOW() << "goto _again;}";
+}
+
+void GotoCodeGen::CALL_EXPR( ostream &ret, InlineItem *ilItem, int targState, bool inFinish )
+{
+ ret << "{" << STACK() << "[" << TOP() << "++] = " << CS() << "; " << CS() << " = (";
+ INLINE_LIST( ret, ilItem->children, targState, inFinish );
+ ret << "); " << CTRL_FLOW() << "goto _again;}";
+}
+
+void GotoCodeGen::RET( ostream &ret, bool inFinish )
+{
+ ret << "{" << CS() << " = " << STACK() << "[--" << TOP() << "]; " <<
+ CTRL_FLOW() << "goto _again;}";
+}
+
+void GotoCodeGen::BREAK( ostream &ret, int targState )
+{
+ outLabelUsed = true;
+ ret << CTRL_FLOW() << "goto _out;";
+}
+
+void GotoCodeGen::writeData()
+{
+ if ( redFsm->anyActions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActArrItem), A() );
+ ACTIONS_ARRAY();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+
+ if ( redFsm->anyToStateActions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActionLoc), TSA() );
+ TO_STATE_ACTIONS();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+
+ if ( redFsm->anyFromStateActions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActionLoc), FSA() );
+ FROM_STATE_ACTIONS();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+
+ if ( redFsm->anyEofActions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActionLoc), EA() );
+ EOF_ACTIONS();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+
+ STATE_IDS();
+}
+
+void GotoCodeGen::writeExec()
+{
+ outLabelUsed = false;
+
+ out << " {\n";
+
+ if ( redFsm->anyRegCurStateRef() )
+ out << " int _ps = 0;\n";
+
+ if ( redFsm->anyToStateActions() || redFsm->anyRegActions()
+ || redFsm->anyFromStateActions() )
+ {
+ out <<
+ " " << PTR_CONST() << ARRAY_TYPE(redFsm->maxActArrItem) << POINTER() << "_acts;\n"
+ " " << UINT() << " _nacts;\n";
+ }
+
+ if ( redFsm->anyConditions() )
+ out << " " << WIDE_ALPH_TYPE() << " _widec;\n";
+
+ out << "\n";
+
+ if ( hasEnd ) {
+ outLabelUsed = true;
+ out <<
+ " if ( " << P() << " == " << PE() << " )\n"
+ " goto _out;\n";
+ }
+
+ out << "_resume:\n";
+
+ if ( redFsm->anyFromStateActions() ) {
+ out <<
+ " _acts = " << ARR_OFF( A(), FSA() + "[" + CS() + "]" ) << ";\n"
+ " _nacts = " << CAST(UINT()) << " *_acts++;\n"
+ " while ( _nacts-- > 0 ) {\n"
+ " switch ( *_acts++ ) {\n";
+ FROM_STATE_ACTION_SWITCH();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ " }\n"
+ "\n";
+ }
+
+ out <<
+ " switch ( " << CS() << " ) {\n";
+ STATE_GOTOS();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ "\n";
+ TRANSITIONS() <<
+ "\n";
+
+ if ( redFsm->anyRegActions() )
+ EXEC_FUNCS() << "\n";
+
+ out << "_again:\n";
+
+ if ( redFsm->anyToStateActions() ) {
+ out <<
+ " _acts = " << ARR_OFF( A(), TSA() + "[" + CS() + "]" ) << ";\n"
+ " _nacts = " << CAST(UINT()) << " *_acts++;\n"
+ " while ( _nacts-- > 0 ) {\n"
+ " switch ( *_acts++ ) {\n";
+ TO_STATE_ACTION_SWITCH();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ " }\n"
+ "\n";
+ }
+
+ if ( hasEnd ) {
+ out <<
+ " if ( ++" << P() << " != " << PE() << " )\n"
+ " goto _resume;\n";
+ }
+ else {
+ out <<
+ " " << P() << " += 1;\n"
+ " goto _resume;\n";
+ }
+
+ if ( outLabelUsed )
+ out << " _out: {}\n";
+
+ out << " }\n";
+}
+
+void GotoCodeGen::writeEOF()
+{
+ if ( redFsm->anyEofActions() ) {
+ out <<
+ " {\n"
+ " " << PTR_CONST() << ARRAY_TYPE(redFsm->maxActArrItem) << POINTER() << "_acts = " <<
+ ARR_OFF( A(), EA() + "[" + CS() + "]" ) << ";\n"
+ " " << UINT() << " _nacts = " << CAST(UINT()) << " *_acts++;\n"
+ " while ( _nacts-- > 0 ) {\n"
+ " switch ( *_acts++ ) {\n";
+ EOF_ACTION_SWITCH();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ " }\n"
+ " }\n"
+ "\n";
+ }
+}
diff --git a/contrib/tools/ragel5/rlgen-cd/gotocodegen.h b/contrib/tools/ragel5/rlgen-cd/gotocodegen.h
new file mode 100644
index 0000000000..625c2c23bd
--- /dev/null
+++ b/contrib/tools/ragel5/rlgen-cd/gotocodegen.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ * 2004 Erich Ocean <eric.ocean@ampede.com>
+ * 2005 Alan West <alan@alanz.com>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _GOTOCODEGEN_H
+#define _GOTOCODEGEN_H
+
+#include <iostream>
+#include "fsmcodegen.h"
+
+/* Forwards. */
+struct CodeGenData;
+struct NameInst;
+struct RedTransAp;
+struct RedStateAp;
+struct StateCond;
+
+/*
+ * Goto driven fsm.
+ */
+class GotoCodeGen : virtual public FsmCodeGen
+{
+public:
+ GotoCodeGen( ostream &out ) : FsmCodeGen(out) {}
+ std::ostream &TO_STATE_ACTION_SWITCH();
+ std::ostream &FROM_STATE_ACTION_SWITCH();
+ std::ostream &EOF_ACTION_SWITCH();
+ std::ostream &ACTION_SWITCH();
+ std::ostream &STATE_GOTOS();
+ std::ostream &TRANSITIONS();
+ std::ostream &EXEC_FUNCS();
+ std::ostream &FINISH_CASES();
+
+ void GOTO( ostream &ret, int gotoDest, bool inFinish );
+ void CALL( ostream &ret, int callDest, int targState, bool inFinish );
+ void NEXT( ostream &ret, int nextDest, bool inFinish );
+ void GOTO_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish );
+ void NEXT_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish );
+ void CALL_EXPR( ostream &ret, InlineItem *ilItem, int targState, bool inFinish );
+ void CURS( ostream &ret, bool inFinish );
+ void TARGS( ostream &ret, bool inFinish, int targState );
+ void RET( ostream &ret, bool inFinish );
+ void BREAK( ostream &ret, int targState );
+
+ virtual unsigned int TO_STATE_ACTION( RedStateAp *state );
+ virtual unsigned int FROM_STATE_ACTION( RedStateAp *state );
+ virtual unsigned int EOF_ACTION( RedStateAp *state );
+
+ std::ostream &TO_STATE_ACTIONS();
+ std::ostream &FROM_STATE_ACTIONS();
+ std::ostream &EOF_ACTIONS();
+
+ void COND_TRANSLATE( StateCond *stateCond, int level );
+ void emitCondBSearch( RedStateAp *state, int level, int low, int high );
+ void STATE_CONDS( RedStateAp *state, bool genDefault );
+
+ virtual std::ostream &TRANS_GOTO( RedTransAp *trans, int level );
+
+ void emitSingleSwitch( RedStateAp *state );
+ void emitRangeBSearch( RedStateAp *state, int level, int low, int high );
+
+ /* Called from STATE_GOTOS just before writing the gotos */
+ virtual void GOTO_HEADER( RedStateAp *state );
+ virtual void STATE_GOTO_ERROR();
+
+ virtual void writeData();
+ virtual void writeEOF();
+ virtual void writeExec();
+};
+
+/*
+ * class CGotoCodeGen
+ */
+struct CGotoCodeGen
+ : public GotoCodeGen, public CCodeGen
+{
+ CGotoCodeGen( ostream &out ) :
+ FsmCodeGen(out), GotoCodeGen(out), CCodeGen(out) {}
+};
+
+/*
+ * class DGotoCodeGen
+ */
+struct DGotoCodeGen
+ : public GotoCodeGen, public DCodeGen
+{
+ DGotoCodeGen( ostream &out ) :
+ FsmCodeGen(out), GotoCodeGen(out), DCodeGen(out) {}
+};
+
+
+#endif /* _GOTOCODEGEN_H */
diff --git a/contrib/tools/ragel5/rlgen-cd/ipgotocodegen.cpp b/contrib/tools/ragel5/rlgen-cd/ipgotocodegen.cpp
new file mode 100644
index 0000000000..ed65be5fe0
--- /dev/null
+++ b/contrib/tools/ragel5/rlgen-cd/ipgotocodegen.cpp
@@ -0,0 +1,414 @@
+/*
+ * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ * 2004 Erich Ocean <eric.ocean@ampede.com>
+ * 2005 Alan West <alan@alanz.com>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "rlgen-cd.h"
+#include "ipgotocodegen.h"
+#include "redfsm.h"
+#include "gendata.h"
+#include "bstmap.h"
+
+bool IpGotoCodeGen::useAgainLabel()
+{
+ return redFsm->anyRegActionRets() ||
+ redFsm->anyRegActionByValControl() ||
+ redFsm->anyRegNextStmt();
+}
+
+void IpGotoCodeGen::GOTO( ostream &ret, int gotoDest, bool inFinish )
+{
+ ret << "{" << CTRL_FLOW() << "goto st" << gotoDest << ";}";
+}
+
+void IpGotoCodeGen::CALL( ostream &ret, int callDest, int targState, bool inFinish )
+{
+ ret << "{" << STACK() << "[" << TOP() << "++] = " << targState <<
+ "; " << CTRL_FLOW() << "goto st" << callDest << ";}";
+}
+
+void IpGotoCodeGen::RET( ostream &ret, bool inFinish )
+{
+ ret << "{" << CS() << " = " << STACK() << "[--" << TOP() << "]; " <<
+ CTRL_FLOW() << "goto _again;}";
+}
+
+void IpGotoCodeGen::GOTO_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish )
+{
+ ret << "{" << CS() << " = (";
+ INLINE_LIST( ret, ilItem->children, 0, inFinish );
+ ret << "); " << CTRL_FLOW() << "goto _again;}";
+}
+
+void IpGotoCodeGen::CALL_EXPR( ostream &ret, InlineItem *ilItem, int targState, bool inFinish )
+{
+ ret << "{" << STACK() << "[" << TOP() << "++] = " << targState << "; " << CS() << " = (";
+ INLINE_LIST( ret, ilItem->children, 0, inFinish );
+ ret << "); " << CTRL_FLOW() << "goto _again;}";
+}
+
+void IpGotoCodeGen::NEXT( ostream &ret, int nextDest, bool inFinish )
+{
+ ret << CS() << " = " << nextDest << ";";
+}
+
+void IpGotoCodeGen::NEXT_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish )
+{
+ ret << CS() << " = (";
+ INLINE_LIST( ret, ilItem->children, 0, inFinish );
+ ret << ");";
+}
+
+void IpGotoCodeGen::CURS( ostream &ret, bool inFinish )
+{
+ ret << "(_ps)";
+}
+
+void IpGotoCodeGen::TARGS( ostream &ret, bool inFinish, int targState )
+{
+ ret << targState;
+}
+
+void IpGotoCodeGen::BREAK( ostream &ret, int targState )
+{
+ ret << CTRL_FLOW() << "goto _out" << targState << ";";
+}
+
+bool IpGotoCodeGen::IN_TRANS_ACTIONS( RedStateAp *state )
+{
+ bool anyWritten = false;
+
+ /* Emit any transitions that have actions and that go to this state. */
+ for ( int it = 0; it < state->numInTrans; it++ ) {
+ RedTransAp *trans = state->inTrans[it];
+ if ( trans->action != 0 && trans->labelNeeded ) {
+ /* Remember that we wrote an action so we know to write the
+ * line directive for going back to the output. */
+ anyWritten = true;
+
+ /* Write the label for the transition so it can be jumped to. */
+ out << "tr" << trans->id << ":\n";
+
+ /* If the action contains a next, then we must preload the current
+ * state since the action may or may not set it. */
+ if ( trans->action->anyNextStmt() )
+ out << " " << CS() << " = " << trans->targ->id << ";\n";
+
+ /* Write each action in the list. */
+ for ( ActionTable::Iter item = trans->action->key; item.lte(); item++ )
+ ACTION( out, item->value, trans->targ->id, false );
+
+ /* If the action contains a next then we need to reload, otherwise
+ * jump directly to the target state. */
+ if ( trans->action->anyNextStmt() )
+ out << "\tgoto _again;\n";
+ else
+ out << "\tgoto st" << trans->targ->id << ";\n";
+ }
+ }
+
+ return anyWritten;
+}
+
+/* Called from GotoCodeGen::STATE_GOTOS just before writing the gotos for each
+ * state. */
+void IpGotoCodeGen::GOTO_HEADER( RedStateAp *state )
+{
+ bool anyWritten = IN_TRANS_ACTIONS( state );
+
+ if ( state->labelNeeded )
+ out << "st" << state->id << ":\n";
+
+ if ( state->toStateAction != 0 ) {
+ /* Remember that we wrote an action. Write every action in the list. */
+ anyWritten = true;
+ for ( ActionTable::Iter item = state->toStateAction->key; item.lte(); item++ )
+ ACTION( out, item->value, state->id, false );
+ }
+
+ /* Advance and test buffer pos. */
+ if ( state->labelNeeded ) {
+ if ( hasEnd ) {
+ out <<
+ " if ( ++" << P() << " == " << PE() << " )\n"
+ " goto _out" << state->id << ";\n";
+ }
+ else {
+ out <<
+ " " << P() << " += 1;\n";
+ }
+ }
+
+ /* Give the state a switch case. */
+ out << "case " << state->id << ":\n";
+
+ if ( state->fromStateAction != 0 ) {
+ /* Remember that we wrote an action. Write every action in the list. */
+ anyWritten = true;
+ for ( ActionTable::Iter item = state->fromStateAction->key; item.lte(); item++ )
+ ACTION( out, item->value, state->id, false );
+ }
+
+ if ( anyWritten )
+ genLineDirective( out );
+
+ /* Record the prev state if necessary. */
+ if ( state->anyRegCurStateRef() )
+ out << " _ps = " << state->id << ";\n";
+}
+
+void IpGotoCodeGen::STATE_GOTO_ERROR()
+{
+ /* In the error state we need to emit some stuff that usually goes into
+ * the header. */
+ RedStateAp *state = redFsm->errState;
+ bool anyWritten = IN_TRANS_ACTIONS( state );
+
+ /* No case label needed since we don't switch on the error state. */
+ if ( anyWritten )
+ genLineDirective( out );
+
+ if ( state->labelNeeded )
+ out << "st" << state->id << ":\n";
+
+ /* Break out here. */
+ out << " goto _out" << state->id << ";\n";
+}
+
+
+/* Emit the goto to take for a given transition. */
+std::ostream &IpGotoCodeGen::TRANS_GOTO( RedTransAp *trans, int level )
+{
+ if ( trans->action != 0 ) {
+ /* Go to the transition which will go to the state. */
+ out << TABS(level) << "goto tr" << trans->id << ";";
+ }
+ else {
+ /* Go directly to the target state. */
+ out << TABS(level) << "goto st" << trans->targ->id << ";";
+ }
+ return out;
+}
+
+std::ostream &IpGotoCodeGen::EXIT_STATES()
+{
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ if ( st->outNeeded ) {
+ outLabelUsed = true;
+ out << " _out" << st->id << ": " << CS() << " = " <<
+ st->id << "; goto _out; \n";
+ }
+ }
+ return out;
+}
+
+std::ostream &IpGotoCodeGen::AGAIN_CASES()
+{
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ out <<
+ " case " << st->id << ": goto st" << st->id << ";\n";
+ }
+ return out;
+}
+
+std::ostream &IpGotoCodeGen::FINISH_CASES()
+{
+ bool anyWritten = false;
+
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ if ( st->eofAction != 0 ) {
+ if ( st->eofAction->eofRefs == 0 )
+ st->eofAction->eofRefs = new IntSet;
+ st->eofAction->eofRefs->insert( st->id );
+ }
+ }
+
+ for ( ActionTableMap::Iter act = redFsm->actionMap; act.lte(); act++ ) {
+ if ( act->eofRefs != 0 ) {
+ for ( IntSet::Iter pst = *act->eofRefs; pst.lte(); pst++ )
+ out << " case " << *pst << ": \n";
+
+ /* Remember that we wrote a trans so we know to write the
+ * line directive for going back to the output. */
+ anyWritten = true;
+
+ /* Write each action in the eof action list. */
+ for ( ActionTable::Iter item = act->key; item.lte(); item++ )
+ ACTION( out, item->value, STATE_ERR_STATE, true );
+ out << "\tbreak;\n";
+ }
+ }
+
+ if ( anyWritten )
+ genLineDirective( out );
+ return out;
+}
+
+void IpGotoCodeGen::setLabelsNeeded( InlineList *inlineList )
+{
+ for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
+ switch ( item->type ) {
+ case InlineItem::Goto: case InlineItem::Call: {
+ /* Mark the target as needing a label. */
+ item->targState->labelNeeded = true;
+ break;
+ }
+ default: break;
+ }
+
+ if ( item->children != 0 )
+ setLabelsNeeded( item->children );
+ }
+}
+
+/* Set up labelNeeded flag for each state. */
+void IpGotoCodeGen::setLabelsNeeded()
+{
+ /* If we use the _again label, then we the _again switch, which uses all
+ * labels. */
+ if ( useAgainLabel() ) {
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ )
+ st->labelNeeded = true;
+ }
+ else {
+ /* Do not use all labels by default, init all labelNeeded vars to false. */
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ )
+ st->labelNeeded = false;
+
+ if ( redFsm->errState != 0 && redFsm->anyLmSwitchError() )
+ redFsm->errState->labelNeeded = true;
+
+ /* Walk all transitions and set only those that have targs. */
+ for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) {
+ /* If there is no action with a next statement, then the label will be
+ * needed. */
+ if ( trans->action == 0 || !trans->action->anyNextStmt() )
+ trans->targ->labelNeeded = true;
+
+ /* Need labels for states that have goto or calls in action code
+ * invoked on characters (ie, not from out action code). */
+ if ( trans->action != 0 ) {
+ /* Loop the actions. */
+ for ( ActionTable::Iter act = trans->action->key; act.lte(); act++ ) {
+ /* Get the action and walk it's tree. */
+ setLabelsNeeded( act->value->inlineList );
+ }
+ }
+ }
+ }
+
+ if ( hasEnd ) {
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ )
+ st->outNeeded = st->labelNeeded;
+ }
+ else {
+ if ( redFsm->errState != 0 )
+ redFsm->errState->outNeeded = true;
+
+ for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) {
+ /* Any state with a transition in that has a break will need an
+ * out label. */
+ if ( trans->action != 0 && trans->action->anyBreakStmt() )
+ trans->targ->outNeeded = true;
+ }
+ }
+}
+
+void IpGotoCodeGen::writeData()
+{
+ STATE_IDS();
+}
+
+void IpGotoCodeGen::writeExec()
+{
+ /* Must set labels immediately before writing because we may depend on the
+ * noend write option. */
+ setLabelsNeeded();
+ outLabelUsed = false;
+
+ out << " {\n";
+
+ if ( redFsm->anyRegCurStateRef() )
+ out << " int _ps = 0;\n";
+
+ if ( redFsm->anyConditions() )
+ out << " " << WIDE_ALPH_TYPE() << " _widec;\n";
+
+ if ( hasEnd ) {
+ outLabelUsed = true;
+ out <<
+ " if ( " << P() << " == " << PE() << " )\n"
+ " goto _out;\n";
+ }
+
+ if ( useAgainLabel() ) {
+ out <<
+ " goto _resume;\n"
+ "\n"
+ "_again:\n"
+ " switch ( " << CS() << " ) {\n";
+ AGAIN_CASES() <<
+ " default: break;\n"
+ " }\n"
+ "\n";
+
+ if ( hasEnd ) {
+ outLabelUsed = true;
+ out <<
+ " if ( ++" << P() << " == " << PE() << " )\n"
+ " goto _out;\n";
+ }
+ else {
+ out <<
+ " " << P() << " += 1;\n";
+ }
+
+ out << "_resume:\n";
+ }
+
+ out <<
+ " switch ( " << CS() << " )\n {\n";
+ STATE_GOTOS();
+ SWITCH_DEFAULT() <<
+ " }\n";
+ EXIT_STATES() <<
+ "\n";
+
+ if ( outLabelUsed )
+ out << " _out: {}\n";
+
+ out <<
+ " }\n";
+}
+
+void IpGotoCodeGen::writeEOF()
+{
+ if ( redFsm->anyEofActions() ) {
+ out <<
+ " {\n"
+ " switch ( " << CS() << " ) {\n";
+ FINISH_CASES();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ " }\n"
+ "\n";
+ }
+}
diff --git a/contrib/tools/ragel5/rlgen-cd/ipgotocodegen.h b/contrib/tools/ragel5/rlgen-cd/ipgotocodegen.h
new file mode 100644
index 0000000000..f32678baba
--- /dev/null
+++ b/contrib/tools/ragel5/rlgen-cd/ipgotocodegen.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ * 2004 Erich Ocean <eric.ocean@ampede.com>
+ * 2005 Alan West <alan@alanz.com>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _IPGCODEGEN_H
+#define _IPGCODEGEN_H
+
+#include <iostream>
+#include "gotocodegen.h"
+
+/* Forwards. */
+struct CodeGenData;
+
+/*
+ * class FGotoCodeGen
+ */
+class IpGotoCodeGen : public GotoCodeGen
+{
+public:
+ IpGotoCodeGen( ostream &out ) : FsmCodeGen(out), GotoCodeGen(out) {}
+
+ std::ostream &EXIT_STATES();
+ std::ostream &TRANS_GOTO( RedTransAp *trans, int level );
+ std::ostream &FINISH_CASES();
+ std::ostream &AGAIN_CASES();
+
+ void GOTO( ostream &ret, int gotoDest, bool inFinish );
+ void CALL( ostream &ret, int callDest, int targState, bool inFinish );
+ void NEXT( ostream &ret, int nextDest, bool inFinish );
+ void GOTO_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish );
+ void NEXT_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish );
+ void CALL_EXPR( ostream &ret, InlineItem *ilItem, int targState, bool inFinish );
+ void RET( ostream &ret, bool inFinish );
+ void CURS( ostream &ret, bool inFinish );
+ void TARGS( ostream &ret, bool inFinish, int targState );
+ void BREAK( ostream &ret, int targState );
+
+ virtual void writeData();
+ virtual void writeEOF();
+ virtual void writeExec();
+
+protected:
+ bool useAgainLabel();
+
+ /* Called from GotoCodeGen::STATE_GOTOS just before writing the gotos for
+ * each state. */
+ bool IN_TRANS_ACTIONS( RedStateAp *state );
+ void GOTO_HEADER( RedStateAp *state );
+ void STATE_GOTO_ERROR();
+
+ /* Set up labelNeeded flag for each state. */
+ void setLabelsNeeded( InlineList *inlineList );
+ void setLabelsNeeded();
+};
+
+
+/*
+ * class CIpGotoCodeGen
+ */
+struct CIpGotoCodeGen
+ : public IpGotoCodeGen, public CCodeGen
+{
+ CIpGotoCodeGen( ostream &out ) :
+ FsmCodeGen(out), IpGotoCodeGen(out), CCodeGen(out) {}
+};
+
+/*
+ * class DIpGotoCodeGen
+ */
+struct DIpGotoCodeGen
+ : public IpGotoCodeGen, public DCodeGen
+{
+ DIpGotoCodeGen( ostream &out ) :
+ FsmCodeGen(out), IpGotoCodeGen(out), DCodeGen(out) {}
+};
+
+
+#endif /* _IPGCODEGEN_H */
diff --git a/contrib/tools/ragel5/rlgen-cd/main.cpp b/contrib/tools/ragel5/rlgen-cd/main.cpp
new file mode 100644
index 0000000000..cabe4bd97d
--- /dev/null
+++ b/contrib/tools/ragel5/rlgen-cd/main.cpp
@@ -0,0 +1,394 @@
+/*
+ * Copyright 2001-2007 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <iostream>
+#include <fstream>
+#ifndef _WIN32
+# include <unistd.h>
+#endif
+
+#include "common.h"
+#include "rlgen-cd.h"
+#include "xmlparse.h"
+#include "pcheck.h"
+#include "vector.h"
+#include "version.h"
+
+/* Code generators. */
+#include "tabcodegen.h"
+#include "ftabcodegen.h"
+#include "flatcodegen.h"
+#include "fflatcodegen.h"
+#include "gotocodegen.h"
+#include "fgotocodegen.h"
+#include "ipgotocodegen.h"
+#include "splitcodegen.h"
+
+using std::istream;
+using std::ifstream;
+using std::ostream;
+using std::ios;
+using std::cin;
+using std::cout;
+using std::cerr;
+using std::endl;
+
+/* Target language and output style. */
+CodeStyleEnum codeStyle = GenTables;
+
+/* Io globals. */
+istream *inStream = 0;
+ostream *outStream = 0;
+output_filter *outFilter = 0;
+char *outputFileName = 0;
+
+/* Graphviz dot file generation. */
+bool graphvizDone = false;
+
+int numSplitPartitions = 0;
+bool noLineDirectives = false;
+bool printPrintables = false;
+
+/* Print a summary of the options. */
+void usage()
+{
+ cout <<
+"usage: " PROGNAME " [options] file\n"
+"general:\n"
+" -h, -H, -?, --help Print this usage and exit\n"
+" -v, --version Print version information and exit\n"
+" -o <file> Write output to <file>\n"
+"code generation options:\n"
+" -l Inhibit writing of #line directives\n"
+"generated code style:\n"
+" -T0 Table driven FSM (default)\n"
+" -T1 Faster table driven FSM\n"
+" -F0 Flat table driven FSM\n"
+" -F1 Faster flat table-driven FSM\n"
+" -G0 Goto-driven FSM\n"
+" -G1 Faster goto-driven FSM\n"
+" -G2 Really fast goto-driven FSM\n"
+" -P<N> N-Way Split really fast goto-driven FSM\n"
+ ;
+}
+
+/* Print version information. */
+void version()
+{
+ cout << "Ragel Code Generator for C, C++, Objective-C and D" << endl <<
+ "Version " VERSION << ", " PUBDATE << endl <<
+ "Copyright (c) 2001-2007 by Adrian Thurston" << endl;
+}
+
+/* Total error count. */
+int gblErrorCount = 0;
+
+ostream &error()
+{
+ gblErrorCount += 1;
+ cerr << PROGNAME ": ";
+ return cerr;
+}
+
+/*
+ * Callbacks invoked by the XML data parser.
+ */
+
+/* Invoked by the parser when the root element is opened. */
+ostream *openOutput( char *inputFile )
+{
+ if ( hostLangType != CCode && hostLangType != DCode ) {
+ error() << "this code generator is for C and D only" << endl;
+ exit(1);
+ }
+
+ /* If the output format is code and no output file name is given, then
+ * make a default. */
+ if ( outputFileName == 0 ) {
+ char *ext = findFileExtension( inputFile );
+ if ( ext != 0 && strcmp( ext, ".rh" ) == 0 )
+ outputFileName = fileNameFromStem( inputFile, ".h" );
+ else {
+ const char *defExtension = 0;
+ switch ( hostLangType ) {
+ case CCode: defExtension = ".c"; break;
+ case DCode: defExtension = ".d"; break;
+ default: break;
+ }
+ outputFileName = fileNameFromStem( inputFile, defExtension );
+ }
+ }
+
+ /* Make sure we are not writing to the same file as the input file. */
+ if ( outputFileName != 0 && strcmp( inputFile, outputFileName ) == 0 ) {
+ error() << "output file \"" << outputFileName <<
+ "\" is the same as the input file" << endl;
+ }
+
+ if ( outputFileName != 0 ) {
+ /* Create the filter on the output and open it. */
+ outFilter = new output_filter( outputFileName );
+ outFilter->open( outputFileName, ios::out|ios::trunc );
+ if ( !outFilter->is_open() ) {
+ error() << "error opening " << outputFileName << " for writing" << endl;
+ exit(1);
+ }
+
+ /* Open the output stream, attaching it to the filter. */
+ outStream = new ostream( outFilter );
+ }
+ else {
+ /* Writing out ot std out. */
+ outStream = &cout;
+ }
+ return outStream;
+}
+
+/* Invoked by the parser when a ragel definition is opened. */
+CodeGenData *makeCodeGen( char *sourceFileName, char *fsmName,
+ ostream &out, bool wantComplete )
+{
+ CodeGenData *codeGen = 0;
+ switch ( hostLangType ) {
+ case CCode:
+ switch ( codeStyle ) {
+ case GenTables:
+ codeGen = new CTabCodeGen(out);
+ break;
+ case GenFTables:
+ codeGen = new CFTabCodeGen(out);
+ break;
+ case GenFlat:
+ codeGen = new CFlatCodeGen(out);
+ break;
+ case GenFFlat:
+ codeGen = new CFFlatCodeGen(out);
+ break;
+ case GenGoto:
+ codeGen = new CGotoCodeGen(out);
+ break;
+ case GenFGoto:
+ codeGen = new CFGotoCodeGen(out);
+ break;
+ case GenIpGoto:
+ codeGen = new CIpGotoCodeGen(out);
+ break;
+ case GenSplit:
+ codeGen = new CSplitCodeGen(out);
+ break;
+ }
+ break;
+
+ case DCode:
+ switch ( codeStyle ) {
+ case GenTables:
+ codeGen = new DTabCodeGen(out);
+ break;
+ case GenFTables:
+ codeGen = new DFTabCodeGen(out);
+ break;
+ case GenFlat:
+ codeGen = new DFlatCodeGen(out);
+ break;
+ case GenFFlat:
+ codeGen = new DFFlatCodeGen(out);
+ break;
+ case GenGoto:
+ codeGen = new DGotoCodeGen(out);
+ break;
+ case GenFGoto:
+ codeGen = new DFGotoCodeGen(out);
+ break;
+ case GenIpGoto:
+ codeGen = new DIpGotoCodeGen(out);
+ break;
+ case GenSplit:
+ codeGen = new DSplitCodeGen(out);
+ break;
+ }
+ break;
+
+ default: break;
+ }
+
+ codeGen->sourceFileName = sourceFileName;
+ codeGen->fsmName = fsmName;
+ codeGen->wantComplete = wantComplete;
+
+ return codeGen;
+}
+
+
+
+/* Main, process args and call yyparse to start scanning input. */
+int main(int argc, char **argv)
+{
+ ParamCheck pc("-:Hh?vlo:T:F:G:P:", argc, argv);
+ const char *xmlInputFileName = 0;
+
+ while ( pc.check() ) {
+ switch ( pc.state ) {
+ case ParamCheck::match:
+ switch ( pc.parameter ) {
+ /* Output. */
+ case 'o':
+ if ( *pc.parameterArg == 0 )
+ error() << "a zero length output file name was given" << endl;
+ else if ( outputFileName != 0 )
+ error() << "more than one output file name was given" << endl;
+ else {
+ /* Ok, remember the output file name. */
+ outputFileName = pc.parameterArg;
+ }
+ break;
+
+ case 'l':
+ noLineDirectives = true;
+ break;
+
+ /* Code style. */
+ case 'T':
+ if ( pc.parameterArg[0] == '0' )
+ codeStyle = GenTables;
+ else if ( pc.parameterArg[0] == '1' )
+ codeStyle = GenFTables;
+ else {
+ error() << "-T" << pc.parameterArg[0] <<
+ " is an invalid argument" << endl;
+ exit(1);
+ }
+ break;
+ case 'F':
+ if ( pc.parameterArg[0] == '0' )
+ codeStyle = GenFlat;
+ else if ( pc.parameterArg[0] == '1' )
+ codeStyle = GenFFlat;
+ else {
+ error() << "-F" << pc.parameterArg[0] <<
+ " is an invalid argument" << endl;
+ exit(1);
+ }
+ break;
+ case 'G':
+ if ( pc.parameterArg[0] == '0' )
+ codeStyle = GenGoto;
+ else if ( pc.parameterArg[0] == '1' )
+ codeStyle = GenFGoto;
+ else if ( pc.parameterArg[0] == '2' )
+ codeStyle = GenIpGoto;
+ else {
+ error() << "-G" << pc.parameterArg[0] <<
+ " is an invalid argument" << endl;
+ exit(1);
+ }
+ break;
+ case 'P':
+ codeStyle = GenSplit;
+ numSplitPartitions = atoi( pc.parameterArg );
+ break;
+
+ /* Version and help. */
+ case 'v':
+ version();
+ exit(0);
+ case 'H': case 'h': case '?':
+ usage();
+ exit(0);
+ case '-':
+ if ( strcasecmp(pc.parameterArg, "help") == 0 ) {
+ usage();
+ exit(0);
+ }
+ else if ( strcasecmp(pc.parameterArg, "version") == 0 ) {
+ version();
+ exit(0);
+ }
+ else {
+ error() << "--" << pc.parameterArg <<
+ " is an invalid argument" << endl;
+ break;
+ }
+ }
+ break;
+
+ case ParamCheck::invalid:
+ error() << "-" << pc.parameter << " is an invalid argument" << endl;
+ break;
+
+ case ParamCheck::noparam:
+ if ( *pc.curArg == 0 )
+ error() << "a zero length input file name was given" << endl;
+ else if ( xmlInputFileName != 0 )
+ error() << "more than one input file name was given" << endl;
+ else {
+ /* OK, Remember the filename. */
+ xmlInputFileName = pc.curArg;
+ }
+ break;
+ }
+ }
+
+ /* Bail on above errors. */
+ if ( gblErrorCount > 0 )
+ exit(1);
+
+ /* Open the input file for reading. */
+ if ( xmlInputFileName != 0 ) {
+ /* Open the input file for reading. */
+ ifstream *inFile = new ifstream( xmlInputFileName );
+ inStream = inFile;
+ if ( ! inFile->is_open() )
+ error() << "could not open " << xmlInputFileName << " for reading" << endl;
+ }
+ else {
+ xmlInputFileName = "<stdin>";
+ inStream = &cin;
+ }
+
+ /* Bail on above errors. */
+ if ( gblErrorCount > 0 )
+ exit(1);
+
+ bool wantComplete = true;
+ bool outputActive = true;
+
+ /* Parse the input! */
+ xml_parse( *inStream, xmlInputFileName, outputActive, wantComplete );
+
+ /* If writing to a file, delete the ostream, causing it to flush.
+ * Standard out is flushed automatically. */
+ if ( outputFileName != 0 ) {
+ delete outStream;
+ delete outFilter;
+ }
+
+ /* Finished, final check for errors.. */
+ if ( gblErrorCount > 0 ) {
+ /* If we opened an output file, remove it. */
+ if ( outputFileName != 0 )
+ unlink( outputFileName );
+ exit(1);
+ }
+ return 0;
+}
diff --git a/contrib/tools/ragel5/rlgen-cd/rlgen-cd.h b/contrib/tools/ragel5/rlgen-cd/rlgen-cd.h
new file mode 100644
index 0000000000..93acd99bae
--- /dev/null
+++ b/contrib/tools/ragel5/rlgen-cd/rlgen-cd.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _RLCODEGEN_H
+#define _RLCODEGEN_H
+
+#include <stdio.h>
+#include <iostream>
+#include "avltree.h"
+#include "vector.h"
+#include "config.h"
+
+#define PROGNAME "rlgen-cd"
+
+/* Target output style. */
+enum CodeStyleEnum
+{
+ GenTables,
+ GenFTables,
+ GenFlat,
+ GenFFlat,
+ GenGoto,
+ GenFGoto,
+ GenIpGoto,
+ GenSplit
+};
+
+extern CodeStyleEnum codeStyle;
+
+
+/* IO filenames and stream. */
+extern bool graphvizDone;
+
+extern int gblErrorCount;
+
+/* Options. */
+extern int numSplitPartitions;
+extern bool noLineDirectives;
+
+std::ostream &error();
+
+#endif /* _RLCODEGEN_H */
diff --git a/contrib/tools/ragel5/rlgen-cd/splitcodegen.cpp b/contrib/tools/ragel5/rlgen-cd/splitcodegen.cpp
new file mode 100644
index 0000000000..d703b37eea
--- /dev/null
+++ b/contrib/tools/ragel5/rlgen-cd/splitcodegen.cpp
@@ -0,0 +1,521 @@
+/*
+ * Copyright 2006 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+
+#include "rlgen-cd.h"
+#include "splitcodegen.h"
+#include "gendata.h"
+#include <assert.h>
+
+using std::ostream;
+using std::ios;
+using std::endl;
+
+/* Emit the goto to take for a given transition. */
+std::ostream &SplitCodeGen::TRANS_GOTO( RedTransAp *trans, int level )
+{
+ if ( trans->targ->partition == currentPartition ) {
+ if ( trans->action != 0 ) {
+ /* Go to the transition which will go to the state. */
+ out << TABS(level) << "goto tr" << trans->id << ";";
+ }
+ else {
+ /* Go directly to the target state. */
+ out << TABS(level) << "goto st" << trans->targ->id << ";";
+ }
+ }
+ else {
+ if ( trans->action != 0 ) {
+ /* Go to the transition which will go to the state. */
+ out << TABS(level) << "goto ptr" << trans->id << ";";
+ trans->partitionBoundary = true;
+ }
+ else {
+ /* Go directly to the target state. */
+ out << TABS(level) << "goto pst" << trans->targ->id << ";";
+ trans->targ->partitionBoundary = true;
+ }
+ }
+ return out;
+}
+
+/* Called from before writing the gotos for each state. */
+void SplitCodeGen::GOTO_HEADER( RedStateAp *state, bool stateInPartition )
+{
+ bool anyWritten = IN_TRANS_ACTIONS( state );
+
+ if ( state->labelNeeded )
+ out << "st" << state->id << ":\n";
+
+ if ( state->toStateAction != 0 ) {
+ /* Remember that we wrote an action. Write every action in the list. */
+ anyWritten = true;
+ for ( ActionTable::Iter item = state->toStateAction->key; item.lte(); item++ )
+ ACTION( out, item->value, state->id, false );
+ }
+
+ /* Advance and test buffer pos. */
+ if ( state->labelNeeded ) {
+ if ( hasEnd ) {
+ out <<
+ " if ( ++" << P() << " == " << PE() << " )\n"
+ " goto _out" << state->id << ";\n";
+ }
+ else {
+ out <<
+ " " << P() << " += 1;\n";
+ }
+ }
+
+ /* Give the state a switch case. */
+ out << "case " << state->id << ":\n";
+
+ if ( state->fromStateAction != 0 ) {
+ /* Remember that we wrote an action. Write every action in the list. */
+ anyWritten = true;
+ for ( ActionTable::Iter item = state->fromStateAction->key; item.lte(); item++ )
+ ACTION( out, item->value, state->id, false );
+ }
+
+ if ( anyWritten )
+ genLineDirective( out );
+
+ /* Record the prev state if necessary. */
+ if ( state->anyRegCurStateRef() )
+ out << " _ps = " << state->id << ";\n";
+}
+
+std::ostream &SplitCodeGen::STATE_GOTOS( int partition )
+{
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ if ( st->partition == partition ) {
+ if ( st == redFsm->errState )
+ STATE_GOTO_ERROR();
+ else {
+ /* We call into the base of the goto which calls back into us
+ * using virtual functions. Set the current partition rather
+ * than coding parameter passing throughout. */
+ currentPartition = partition;
+
+ /* Writing code above state gotos. */
+ GOTO_HEADER( st, st->partition == partition );
+
+ if ( st->stateCondVect.length() > 0 ) {
+ out << " _widec = " << GET_KEY() << ";\n";
+ emitCondBSearch( st, 1, 0, st->stateCondVect.length() - 1 );
+ }
+
+ /* Try singles. */
+ if ( st->outSingle.length() > 0 )
+ emitSingleSwitch( st );
+
+ /* Default case is to binary search for the ranges, if that fails then */
+ if ( st->outRange.length() > 0 )
+ emitRangeBSearch( st, 1, 0, st->outRange.length() - 1 );
+
+ /* Write the default transition. */
+ TRANS_GOTO( st->defTrans, 1 ) << "\n";
+ }
+ }
+ }
+ return out;
+}
+
+
+std::ostream &SplitCodeGen::PART_TRANS( int partition )
+{
+ for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) {
+ if ( trans->partitionBoundary ) {
+ out <<
+ "ptr" << trans->id << ":\n";
+
+ if ( trans->action != 0 ) {
+ /* If the action contains a next, then we must preload the current
+ * state since the action may or may not set it. */
+ if ( trans->action->anyNextStmt() )
+ out << " " << CS() << " = " << trans->targ->id << ";\n";
+
+ /* Write each action in the list. */
+ for ( ActionTable::Iter item = trans->action->key; item.lte(); item++ )
+ ACTION( out, item->value, trans->targ->id, false );
+ }
+
+ out <<
+ " goto pst" << trans->targ->id << ";\n";
+ trans->targ->partitionBoundary = true;
+ }
+ }
+
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ if ( st->partitionBoundary ) {
+ out <<
+ " pst" << st->id << ":\n"
+ " " << CS() << " = " << st->id << ";\n";
+
+ if ( st->toStateAction != 0 ) {
+ /* Remember that we wrote an action. Write every action in the list. */
+ for ( ActionTable::Iter item = st->toStateAction->key; item.lte(); item++ )
+ ACTION( out, item->value, st->id, false );
+ genLineDirective( out );
+ }
+
+ ptOutLabelUsed = true;
+ out << " goto _pt_out; \n";
+ }
+ }
+ return out;
+}
+
+std::ostream &SplitCodeGen::EXIT_STATES( int partition )
+{
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ if ( st->partition == partition && st->outNeeded ) {
+ outLabelUsed = true;
+ out << " _out" << st->id << ": " << CS() << " = " <<
+ st->id << "; goto _out; \n";
+ }
+ }
+ return out;
+}
+
+
+std::ostream &SplitCodeGen::PARTITION( int partition )
+{
+ outLabelUsed = false;
+ ptOutLabelUsed = false;
+
+ /* Initialize the partition boundaries, which get set during the writing
+ * of states. After the state writing we will */
+ for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ )
+ trans->partitionBoundary = false;
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ )
+ st->partitionBoundary = false;
+
+ out << " " << ALPH_TYPE() << " *p = *_pp, *pe = *_ppe;\n";
+
+ if ( redFsm->anyRegCurStateRef() )
+ out << " int _ps = 0;\n";
+
+ if ( redFsm->anyConditions() )
+ out << " " << WIDE_ALPH_TYPE() << " _widec;\n";
+
+ if ( useAgainLabel() ) {
+ out <<
+ " goto _resume;\n"
+ "\n"
+ "_again:\n"
+ " switch ( " << CS() << " ) {\n";
+ AGAIN_CASES() <<
+ " default: break;\n"
+ " }\n"
+ "\n";
+
+
+ if ( hasEnd ) {
+ outLabelUsed = true;
+ out <<
+ " if ( ++" << P() << " == " << PE() << " )\n"
+ " goto _out;\n";
+
+ }
+ else {
+ out <<
+ " " << P() << " += 1;\n";
+ }
+
+ out <<
+ "_resume:\n";
+ }
+
+ out <<
+ " switch ( " << CS() << " )\n {\n";
+ STATE_GOTOS( partition );
+ SWITCH_DEFAULT() <<
+ " }\n";
+ PART_TRANS( partition );
+ EXIT_STATES( partition );
+
+ if ( outLabelUsed ) {
+ out <<
+ "\n"
+ " _out:\n"
+ " *_pp = p;\n"
+ " *_ppe = pe;\n"
+ " return 0;\n";
+ }
+
+ if ( ptOutLabelUsed ) {
+ out <<
+ "\n"
+ " _pt_out:\n"
+ " *_pp = p;\n"
+ " *_ppe = pe;\n"
+ " return 1;\n";
+ }
+
+ return out;
+}
+
+std::ostream &SplitCodeGen::PART_MAP()
+{
+ int *partMap = new int[redFsm->stateList.length()];
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ )
+ partMap[st->id] = st->partition;
+
+ out << "\t";
+ int totalItem = 0;
+ for ( int i = 0; i < redFsm->stateList.length(); i++ ) {
+ out << partMap[i];
+ if ( i != redFsm->stateList.length() - 1 ) {
+ out << ", ";
+ if ( ++totalItem % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+
+ delete[] partMap;
+ return out;
+}
+
+void SplitCodeGen::writeData()
+{
+ out <<
+ "static const int " << START() << " = " << START_STATE_ID() << ";\n"
+ "\n";
+
+ if ( writeFirstFinal ) {
+ out <<
+ "static const int " << FIRST_FINAL() << " = " << FIRST_FINAL_STATE() << ";\n"
+ "\n";
+ }
+
+ if ( writeErr ) {
+ out <<
+ "static const int " << ERROR() << " = " << ERROR_STATE() << ";\n"
+ "\n";
+ }
+
+
+ OPEN_ARRAY( ARRAY_TYPE(numSplitPartitions), PM() );
+ PART_MAP();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ for ( int p = 0; p < redFsm->nParts; p++ ) {
+ out << "int partition" << p << "( " << ALPH_TYPE() << " **_pp, " << ALPH_TYPE() <<
+ " **_ppe, struct " << FSM_NAME() << " *fsm );\n";
+ }
+ out << "\n";
+}
+
+std::ostream &SplitCodeGen::ALL_PARTITIONS()
+{
+ /* compute the format string. */
+ int width = 0, high = redFsm->nParts - 1;
+ while ( high > 0 ) {
+ width++;
+ high /= 10;
+ }
+ assert( width <= 8 );
+ char suffFormat[] = "_%6.6d.c";
+ suffFormat[2] = suffFormat[4] = ( '0' + width );
+
+ for ( int p = 0; p < redFsm->nParts; p++ ) {
+ char suffix[10];
+ sprintf( suffix, suffFormat, p );
+ char *fn = fileNameFromStem( sourceFileName, suffix );
+ char *include = fileNameFromStem( sourceFileName, ".h" );
+
+ /* Create the filter on the output and open it. */
+ output_filter *partFilter = new output_filter( fn );
+ partFilter->open( fn, ios::out|ios::trunc );
+ if ( !partFilter->is_open() ) {
+ error() << "error opening " << fn << " for writing" << endl;
+ exit(1);
+ }
+
+ /* Attach the new file to the output stream. */
+ std::streambuf *prev_rdbuf = out.rdbuf( partFilter );
+
+ out <<
+ "#include \"" << include << "\"\n"
+ "int partition" << p << "( " << ALPH_TYPE() << " **_pp, " << ALPH_TYPE() <<
+ " **_ppe, struct " << FSM_NAME() << " *fsm )\n"
+ "{\n";
+ PARTITION( p ) <<
+ "}\n\n";
+ out.flush();
+
+ /* Fix the output stream. */
+ out.rdbuf( prev_rdbuf );
+ }
+ return out;
+}
+
+
+void SplitCodeGen::writeExec()
+{
+ /* Must set labels immediately before writing because we may depend on the
+ * noend write option. */
+ setLabelsNeeded();
+ out <<
+ " {\n"
+ " int _stat = 0;\n";
+
+ if ( hasEnd ) {
+ out <<
+ " if ( " << P() << " == " << PE() << " )\n"
+ " goto _out;\n";
+ }
+
+ out << " goto _resume;\n";
+
+ /* In this reentry, to-state actions have already been executed on the
+ * partition-switch exit from the last partition. */
+ out << "_reenter:\n";
+
+ if ( hasEnd ) {
+ out <<
+ " if ( ++" << P() << " == " << PE() << " )\n"
+ " goto _out;\n";
+ }
+ else {
+ out <<
+ " " << P() << " += 1;\n";
+ }
+
+ out << "_resume:\n";
+
+ out <<
+ " switch ( " << PM() << "[" << CS() << "] ) {\n";
+ for ( int p = 0; p < redFsm->nParts; p++ ) {
+ out <<
+ " case " << p << ":\n"
+ " _stat = partition" << p << "( &p, &pe, fsm );\n"
+ " break;\n";
+ }
+ out <<
+ " }\n"
+ " if ( _stat )\n"
+ " goto _reenter;\n";
+
+ if ( hasEnd )
+ out << " _out: {}\n";
+
+ out <<
+ " }\n";
+
+ ALL_PARTITIONS();
+}
+
+void SplitCodeGen::setLabelsNeeded( RedStateAp *fromState, InlineList *inlineList )
+{
+ for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
+ switch ( item->type ) {
+ case InlineItem::Goto: case InlineItem::Call: {
+ /* In split code gen we only need labels for transitions across
+ * partitions. */
+ if ( fromState->partition == item->targState->partition ){
+ /* Mark the target as needing a label. */
+ item->targState->labelNeeded = true;
+ }
+ break;
+ }
+ default: break;
+ }
+
+ if ( item->children != 0 )
+ setLabelsNeeded( fromState, item->children );
+ }
+}
+
+void SplitCodeGen::setLabelsNeeded( RedStateAp *fromState, RedTransAp *trans )
+{
+ /* In the split code gen we don't need labels for transitions across
+ * partitions. */
+ if ( fromState->partition == trans->targ->partition ) {
+ /* If there is no action with a next statement, then the label will be
+ * needed. */
+ trans->labelNeeded = true;
+ if ( trans->action == 0 || !trans->action->anyNextStmt() )
+ trans->targ->labelNeeded = true;
+ }
+
+ /* Need labels for states that have goto or calls in action code
+ * invoked on characters (ie, not from out action code). */
+ if ( trans->action != 0 ) {
+ /* Loop the actions. */
+ for ( ActionTable::Iter act = trans->action->key; act.lte(); act++ ) {
+ /* Get the action and walk it's tree. */
+ setLabelsNeeded( fromState, act->value->inlineList );
+ }
+ }
+}
+
+/* Set up labelNeeded flag for each state. */
+void SplitCodeGen::setLabelsNeeded()
+{
+ /* If we use the _again label, then we the _again switch, which uses all
+ * labels. */
+ if ( useAgainLabel() ) {
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ )
+ st->labelNeeded = true;
+ }
+ else {
+ /* Do not use all labels by default, init all labelNeeded vars to false. */
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ )
+ st->labelNeeded = false;
+ for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ )
+ trans->labelNeeded = false;
+
+ if ( redFsm->errState != 0 && redFsm->anyLmSwitchError() )
+ redFsm->errState->labelNeeded = true;
+
+ /* Walk all transitions and set only those that have targs. */
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ for ( RedTransList::Iter tel = st->outRange; tel.lte(); tel++ )
+ setLabelsNeeded( st, tel->value );
+
+ for ( RedTransList::Iter tel = st->outSingle; tel.lte(); tel++ )
+ setLabelsNeeded( st, tel->value );
+
+ if ( st->defTrans != 0 )
+ setLabelsNeeded( st, st->defTrans );
+ }
+ }
+
+ if ( hasEnd ) {
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ )
+ st->outNeeded = st->labelNeeded;
+ }
+ else {
+ if ( redFsm->errState != 0 )
+ redFsm->errState->outNeeded = true;
+
+ for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) {
+ /* Any state with a transition in that has a break will need an
+ * out label. */
+ if ( trans->action != 0 && trans->action->anyBreakStmt() )
+ trans->targ->outNeeded = true;
+ }
+ }
+}
+
diff --git a/contrib/tools/ragel5/rlgen-cd/splitcodegen.h b/contrib/tools/ragel5/rlgen-cd/splitcodegen.h
new file mode 100644
index 0000000000..82fc37150e
--- /dev/null
+++ b/contrib/tools/ragel5/rlgen-cd/splitcodegen.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright 2006 Adrian Thurston <thurston@cs.queensu.ca>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _SPLITCODEGEN_H
+#define _SPLITCODEGEN_H
+
+#include "ipgotocodegen.h"
+
+class SplitCodeGen : public IpGotoCodeGen
+{
+public:
+ SplitCodeGen( ostream &out ) : FsmCodeGen(out), IpGotoCodeGen(out) {}
+
+ bool ptOutLabelUsed;
+
+ std::ostream &PART_MAP();
+ std::ostream &EXIT_STATES( int partition );
+ std::ostream &PART_TRANS( int partition );
+ std::ostream &TRANS_GOTO( RedTransAp *trans, int level );
+ void GOTO_HEADER( RedStateAp *state, bool stateInPartition );
+ std::ostream &STATE_GOTOS( int partition );
+ std::ostream &PARTITION( int partition );
+ std::ostream &ALL_PARTITIONS();
+ void writeData();
+ void writeExec();
+ void writeParts();
+
+ void setLabelsNeeded( RedStateAp *fromState, InlineList *inlineList );
+ void setLabelsNeeded( RedStateAp *fromState, RedTransAp *trans );
+ void setLabelsNeeded();
+
+ int currentPartition;
+};
+
+struct CSplitCodeGen
+ : public SplitCodeGen, public CCodeGen
+{
+ CSplitCodeGen( ostream &out ) :
+ FsmCodeGen(out), SplitCodeGen(out), CCodeGen(out) {}
+};
+
+/*
+ * class DIpGotoCodeGen
+ */
+struct DSplitCodeGen
+ : public SplitCodeGen, public DCodeGen
+{
+ DSplitCodeGen( ostream &out ) :
+ FsmCodeGen(out), SplitCodeGen(out), DCodeGen(out) {}
+};
+
+
+#endif /* _SPLITCODEGEN_H */
diff --git a/contrib/tools/ragel5/rlgen-cd/tabcodegen.cpp b/contrib/tools/ragel5/rlgen-cd/tabcodegen.cpp
new file mode 100644
index 0000000000..22f09534b2
--- /dev/null
+++ b/contrib/tools/ragel5/rlgen-cd/tabcodegen.cpp
@@ -0,0 +1,988 @@
+/*
+ * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ * 2004 Erich Ocean <eric.ocean@ampede.com>
+ * 2005 Alan West <alan@alanz.com>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "rlgen-cd.h"
+#include "tabcodegen.h"
+#include "redfsm.h"
+#include "gendata.h"
+
+/* Determine if we should use indicies or not. */
+void TabCodeGen::calcIndexSize()
+{
+ int sizeWithInds = 0, sizeWithoutInds = 0;
+
+ /* Calculate cost of using with indicies. */
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ int totalIndex = st->outSingle.length() + st->outRange.length() +
+ (st->defTrans == 0 ? 0 : 1);
+ sizeWithInds += arrayTypeSize(redFsm->maxIndex) * totalIndex;
+ }
+ sizeWithInds += arrayTypeSize(redFsm->maxState) * redFsm->transSet.length();
+ if ( redFsm->anyActions() )
+ sizeWithInds += arrayTypeSize(redFsm->maxActionLoc) * redFsm->transSet.length();
+
+ /* Calculate the cost of not using indicies. */
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ int totalIndex = st->outSingle.length() + st->outRange.length() +
+ (st->defTrans == 0 ? 0 : 1);
+ sizeWithoutInds += arrayTypeSize(redFsm->maxState) * totalIndex;
+ if ( redFsm->anyActions() )
+ sizeWithoutInds += arrayTypeSize(redFsm->maxActionLoc) * totalIndex;
+ }
+
+ /* If using indicies reduces the size, use them. */
+ useIndicies = sizeWithInds < sizeWithoutInds;
+}
+
+std::ostream &TabCodeGen::TO_STATE_ACTION( RedStateAp *state )
+{
+ int act = 0;
+ if ( state->toStateAction != 0 )
+ act = state->toStateAction->location+1;
+ out << act;
+ return out;
+}
+
+std::ostream &TabCodeGen::FROM_STATE_ACTION( RedStateAp *state )
+{
+ int act = 0;
+ if ( state->fromStateAction != 0 )
+ act = state->fromStateAction->location+1;
+ out << act;
+ return out;
+}
+
+std::ostream &TabCodeGen::EOF_ACTION( RedStateAp *state )
+{
+ int act = 0;
+ if ( state->eofAction != 0 )
+ act = state->eofAction->location+1;
+ out << act;
+ return out;
+}
+
+
+std::ostream &TabCodeGen::TRANS_ACTION( RedTransAp *trans )
+{
+ /* If there are actions, emit them. Otherwise emit zero. */
+ int act = 0;
+ if ( trans->action != 0 )
+ act = trans->action->location+1;
+ out << act;
+ return out;
+}
+
+std::ostream &TabCodeGen::TO_STATE_ACTION_SWITCH()
+{
+ /* Walk the list of functions, printing the cases. */
+ for ( ActionList::Iter act = actionList; act.lte(); act++ ) {
+ /* Write out referenced actions. */
+ if ( act->numToStateRefs > 0 ) {
+ /* Write the case label, the action and the case break. */
+ out << "\tcase " << act->actionId << ":\n";
+ ACTION( out, act, 0, false );
+ out << "\tbreak;\n";
+ }
+ }
+
+ genLineDirective( out );
+ return out;
+}
+
+std::ostream &TabCodeGen::FROM_STATE_ACTION_SWITCH()
+{
+ /* Walk the list of functions, printing the cases. */
+ for ( ActionList::Iter act = actionList; act.lte(); act++ ) {
+ /* Write out referenced actions. */
+ if ( act->numFromStateRefs > 0 ) {
+ /* Write the case label, the action and the case break. */
+ out << "\tcase " << act->actionId << ":\n";
+ ACTION( out, act, 0, false );
+ out << "\tbreak;\n";
+ }
+ }
+
+ genLineDirective( out );
+ return out;
+}
+
+std::ostream &TabCodeGen::EOF_ACTION_SWITCH()
+{
+ /* Walk the list of functions, printing the cases. */
+ for ( ActionList::Iter act = actionList; act.lte(); act++ ) {
+ /* Write out referenced actions. */
+ if ( act->numEofRefs > 0 ) {
+ /* Write the case label, the action and the case break. */
+ out << "\tcase " << act->actionId << ":\n";
+ ACTION( out, act, 0, true );
+ out << "\tbreak;\n";
+ }
+ }
+
+ genLineDirective( out );
+ return out;
+}
+
+
+std::ostream &TabCodeGen::ACTION_SWITCH()
+{
+ /* Walk the list of functions, printing the cases. */
+ for ( ActionList::Iter act = actionList; act.lte(); act++ ) {
+ /* Write out referenced actions. */
+ if ( act->numTransRefs > 0 ) {
+ /* Write the case label, the action and the case break. */
+ out << "\tcase " << act->actionId << ":\n";
+ ACTION( out, act, 0, false );
+ out << "\tbreak;\n";
+ }
+ }
+
+ genLineDirective( out );
+ return out;
+}
+
+std::ostream &TabCodeGen::COND_OFFSETS()
+{
+ out << "\t";
+ int totalStateNum = 0, curKeyOffset = 0;
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* Write the key offset. */
+ out << curKeyOffset;
+ if ( !st.last() ) {
+ out << ", ";
+ if ( ++totalStateNum % IALL == 0 )
+ out << "\n\t";
+ }
+
+ /* Move the key offset ahead. */
+ curKeyOffset += st->stateCondList.length();
+ }
+ out << "\n";
+ return out;
+}
+
+std::ostream &TabCodeGen::KEY_OFFSETS()
+{
+ out << "\t";
+ int totalStateNum = 0, curKeyOffset = 0;
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* Write the key offset. */
+ out << curKeyOffset;
+ if ( !st.last() ) {
+ out << ", ";
+ if ( ++totalStateNum % IALL == 0 )
+ out << "\n\t";
+ }
+
+ /* Move the key offset ahead. */
+ curKeyOffset += st->outSingle.length() + st->outRange.length()*2;
+ }
+ out << "\n";
+ return out;
+}
+
+
+std::ostream &TabCodeGen::INDEX_OFFSETS()
+{
+ out << "\t";
+ int totalStateNum = 0, curIndOffset = 0;
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* Write the index offset. */
+ out << curIndOffset;
+ if ( !st.last() ) {
+ out << ", ";
+ if ( ++totalStateNum % IALL == 0 )
+ out << "\n\t";
+ }
+
+ /* Move the index offset ahead. */
+ curIndOffset += st->outSingle.length() + st->outRange.length();
+ if ( st->defTrans != 0 )
+ curIndOffset += 1;
+ }
+ out << "\n";
+ return out;
+}
+
+std::ostream &TabCodeGen::COND_LENS()
+{
+ out << "\t";
+ int totalStateNum = 0;
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* Write singles length. */
+ out << st->stateCondList.length();
+ if ( !st.last() ) {
+ out << ", ";
+ if ( ++totalStateNum % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n";
+ return out;
+}
+
+
+std::ostream &TabCodeGen::SINGLE_LENS()
+{
+ out << "\t";
+ int totalStateNum = 0;
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* Write singles length. */
+ out << st->outSingle.length();
+ if ( !st.last() ) {
+ out << ", ";
+ if ( ++totalStateNum % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n";
+ return out;
+}
+
+std::ostream &TabCodeGen::RANGE_LENS()
+{
+ out << "\t";
+ int totalStateNum = 0;
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* Emit length of range index. */
+ out << st->outRange.length();
+ if ( !st.last() ) {
+ out << ", ";
+ if ( ++totalStateNum % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n";
+ return out;
+}
+
+std::ostream &TabCodeGen::TO_STATE_ACTIONS()
+{
+ out << "\t";
+ int totalStateNum = 0;
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* Write any eof action. */
+ TO_STATE_ACTION(st);
+ if ( !st.last() ) {
+ out << ", ";
+ if ( ++totalStateNum % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n";
+ return out;
+}
+
+std::ostream &TabCodeGen::FROM_STATE_ACTIONS()
+{
+ out << "\t";
+ int totalStateNum = 0;
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* Write any eof action. */
+ FROM_STATE_ACTION(st);
+ if ( !st.last() ) {
+ out << ", ";
+ if ( ++totalStateNum % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n";
+ return out;
+}
+
+std::ostream &TabCodeGen::EOF_ACTIONS()
+{
+ out << "\t";
+ int totalStateNum = 0;
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* Write any eof action. */
+ EOF_ACTION(st);
+ if ( !st.last() ) {
+ out << ", ";
+ if ( ++totalStateNum % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n";
+ return out;
+}
+
+std::ostream &TabCodeGen::COND_KEYS()
+{
+ out << '\t';
+ int totalTrans = 0;
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* Loop the state's transitions. */
+ for ( StateCondList::Iter sc = st->stateCondList; sc.lte(); sc++ ) {
+ /* Lower key. */
+ out << KEY( sc->lowKey ) << ", ";
+ if ( ++totalTrans % IALL == 0 )
+ out << "\n\t";
+
+ /* Upper key. */
+ out << KEY( sc->highKey ) << ", ";
+ if ( ++totalTrans % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+
+ /* Output one last number so we don't have to figure out when the last
+ * entry is and avoid writing a comma. */
+ out << 0 << "\n";
+ return out;
+}
+
+std::ostream &TabCodeGen::COND_SPACES()
+{
+ out << '\t';
+ int totalTrans = 0;
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* Loop the state's transitions. */
+ for ( StateCondList::Iter sc = st->stateCondList; sc.lte(); sc++ ) {
+ /* Cond Space id. */
+ out << sc->condSpace->condSpaceId << ", ";
+ if ( ++totalTrans % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+
+ /* Output one last number so we don't have to figure out when the last
+ * entry is and avoid writing a comma. */
+ out << 0 << "\n";
+ return out;
+}
+
+std::ostream &TabCodeGen::KEYS()
+{
+ out << '\t';
+ int totalTrans = 0;
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* Loop the singles. */
+ for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) {
+ out << KEY( stel->lowKey ) << ", ";
+ if ( ++totalTrans % IALL == 0 )
+ out << "\n\t";
+ }
+
+ /* Loop the state's transitions. */
+ for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) {
+ /* Lower key. */
+ out << KEY( rtel->lowKey ) << ", ";
+ if ( ++totalTrans % IALL == 0 )
+ out << "\n\t";
+
+ /* Upper key. */
+ out << KEY( rtel->highKey ) << ", ";
+ if ( ++totalTrans % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+
+ /* Output one last number so we don't have to figure out when the last
+ * entry is and avoid writing a comma. */
+ out << 0 << "\n";
+ return out;
+}
+
+std::ostream &TabCodeGen::INDICIES()
+{
+ int totalTrans = 0;
+ out << '\t';
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* Walk the singles. */
+ for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) {
+ out << stel->value->id << ", ";
+ if ( ++totalTrans % IALL == 0 )
+ out << "\n\t";
+ }
+
+ /* Walk the ranges. */
+ for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) {
+ out << rtel->value->id << ", ";
+ if ( ++totalTrans % IALL == 0 )
+ out << "\n\t";
+ }
+
+ /* The state's default index goes next. */
+ if ( st->defTrans != 0 ) {
+ out << st->defTrans->id << ", ";
+ if ( ++totalTrans % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+
+ /* Output one last number so we don't have to figure out when the last
+ * entry is and avoid writing a comma. */
+ out << 0 << "\n";
+ return out;
+}
+
+std::ostream &TabCodeGen::TRANS_TARGS()
+{
+ int totalTrans = 0;
+ out << '\t';
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* Walk the singles. */
+ for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) {
+ RedTransAp *trans = stel->value;
+ out << trans->targ->id << ", ";
+ if ( ++totalTrans % IALL == 0 )
+ out << "\n\t";
+ }
+
+ /* Walk the ranges. */
+ for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) {
+ RedTransAp *trans = rtel->value;
+ out << trans->targ->id << ", ";
+ if ( ++totalTrans % IALL == 0 )
+ out << "\n\t";
+ }
+
+ /* The state's default target state. */
+ if ( st->defTrans != 0 ) {
+ RedTransAp *trans = st->defTrans;
+ out << trans->targ->id << ", ";
+ if ( ++totalTrans % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+
+ /* Output one last number so we don't have to figure out when the last
+ * entry is and avoid writing a comma. */
+ out << 0 << "\n";
+ return out;
+}
+
+
+std::ostream &TabCodeGen::TRANS_ACTIONS()
+{
+ int totalTrans = 0;
+ out << '\t';
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ /* Walk the singles. */
+ for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) {
+ RedTransAp *trans = stel->value;
+ TRANS_ACTION( trans ) << ", ";
+ if ( ++totalTrans % IALL == 0 )
+ out << "\n\t";
+ }
+
+ /* Walk the ranges. */
+ for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) {
+ RedTransAp *trans = rtel->value;
+ TRANS_ACTION( trans ) << ", ";
+ if ( ++totalTrans % IALL == 0 )
+ out << "\n\t";
+ }
+
+ /* The state's default index goes next. */
+ if ( st->defTrans != 0 ) {
+ RedTransAp *trans = st->defTrans;
+ TRANS_ACTION( trans ) << ", ";
+ if ( ++totalTrans % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+
+ /* Output one last number so we don't have to figure out when the last
+ * entry is and avoid writing a comma. */
+ out << 0 << "\n";
+ return out;
+}
+
+std::ostream &TabCodeGen::TRANS_TARGS_WI()
+{
+ /* Transitions must be written ordered by their id. */
+ RedTransAp **transPtrs = new RedTransAp*[redFsm->transSet.length()];
+ for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ )
+ transPtrs[trans->id] = trans;
+
+ /* Keep a count of the num of items in the array written. */
+ out << '\t';
+ int totalStates = 0;
+ for ( int t = 0; t < redFsm->transSet.length(); t++ ) {
+ /* Write out the target state. */
+ RedTransAp *trans = transPtrs[t];
+ out << trans->targ->id;
+ if ( t < redFsm->transSet.length()-1 ) {
+ out << ", ";
+ if ( ++totalStates % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n";
+ delete[] transPtrs;
+ return out;
+}
+
+
+std::ostream &TabCodeGen::TRANS_ACTIONS_WI()
+{
+ /* Transitions must be written ordered by their id. */
+ RedTransAp **transPtrs = new RedTransAp*[redFsm->transSet.length()];
+ for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ )
+ transPtrs[trans->id] = trans;
+
+ /* Keep a count of the num of items in the array written. */
+ out << '\t';
+ int totalAct = 0;
+ for ( int t = 0; t < redFsm->transSet.length(); t++ ) {
+ /* Write the function for the transition. */
+ RedTransAp *trans = transPtrs[t];
+ TRANS_ACTION( trans );
+ if ( t < redFsm->transSet.length()-1 ) {
+ out << ", ";
+ if ( ++totalAct % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n";
+ delete[] transPtrs;
+ return out;
+}
+
+void TabCodeGen::LOCATE_TRANS()
+{
+ out <<
+ " _keys = " << ARR_OFF( K(), KO() + "[" + CS() + "]" ) << ";\n"
+ " _trans = " << IO() << "[" << CS() << "];\n"
+ "\n"
+ " _klen = " << SL() << "[" << CS() << "];\n"
+ " if ( _klen > 0 ) {\n"
+ " " << PTR_CONST() << WIDE_ALPH_TYPE() << POINTER() << "_lower = _keys;\n"
+ " " << PTR_CONST() << WIDE_ALPH_TYPE() << POINTER() << "_mid;\n"
+ " " << PTR_CONST() << WIDE_ALPH_TYPE() << POINTER() << "_upper = _keys + _klen - 1;\n"
+ " while (1) {\n"
+ " if ( _upper < _lower )\n"
+ " break;\n"
+ "\n"
+ " _mid = _lower + ((_upper-_lower) >> 1);\n"
+ " if ( " << GET_WIDE_KEY() << " < *_mid )\n"
+ " _upper = _mid - 1;\n"
+ " else if ( " << GET_WIDE_KEY() << " > *_mid )\n"
+ " _lower = _mid + 1;\n"
+ " else {\n"
+ " _trans += (_mid - _keys);\n"
+ " goto _match;\n"
+ " }\n"
+ " }\n"
+ " _keys += _klen;\n"
+ " _trans += _klen;\n"
+ " }\n"
+ "\n"
+ " _klen = " << RL() << "[" << CS() << "];\n"
+ " if ( _klen > 0 ) {\n"
+ " " << PTR_CONST() << WIDE_ALPH_TYPE() << POINTER() << "_lower = _keys;\n"
+ " " << PTR_CONST() << WIDE_ALPH_TYPE() << POINTER() << "_mid;\n"
+ " " << PTR_CONST() << WIDE_ALPH_TYPE() << POINTER() << "_upper = _keys + (_klen<<1) - 2;\n"
+ " while (1) {\n"
+ " if ( _upper < _lower )\n"
+ " break;\n"
+ "\n"
+ " _mid = _lower + (((_upper-_lower) >> 1) & ~1);\n"
+ " if ( " << GET_WIDE_KEY() << " < _mid[0] )\n"
+ " _upper = _mid - 2;\n"
+ " else if ( " << GET_WIDE_KEY() << " > _mid[1] )\n"
+ " _lower = _mid + 2;\n"
+ " else {\n"
+ " _trans += ((_mid - _keys)>>1);\n"
+ " goto _match;\n"
+ " }\n"
+ " }\n"
+ " _trans += _klen;\n"
+ " }\n"
+ "\n";
+}
+
+void TabCodeGen::GOTO( ostream &ret, int gotoDest, bool inFinish )
+{
+ ret << "{" << CS() << " = " << gotoDest << "; " <<
+ CTRL_FLOW() << "goto _again;}";
+}
+
+void TabCodeGen::GOTO_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish )
+{
+ ret << "{" << CS() << " = (";
+ INLINE_LIST( ret, ilItem->children, 0, inFinish );
+ ret << "); " << CTRL_FLOW() << "goto _again;}";
+}
+
+void TabCodeGen::CURS( ostream &ret, bool inFinish )
+{
+ ret << "(_ps)";
+}
+
+void TabCodeGen::TARGS( ostream &ret, bool inFinish, int targState )
+{
+ ret << "(" << CS() << ")";
+}
+
+void TabCodeGen::NEXT( ostream &ret, int nextDest, bool inFinish )
+{
+ ret << CS() << " = " << nextDest << ";";
+}
+
+void TabCodeGen::NEXT_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish )
+{
+ ret << CS() << " = (";
+ INLINE_LIST( ret, ilItem->children, 0, inFinish );
+ ret << ");";
+}
+
+void TabCodeGen::CALL( ostream &ret, int callDest, int targState, bool inFinish )
+{
+ ret << "{" << STACK() << "[" << TOP() << "++] = " << CS() << "; " << CS() << " = " <<
+ callDest << "; " << CTRL_FLOW() << "goto _again;}";
+}
+
+void TabCodeGen::CALL_EXPR( ostream &ret, InlineItem *ilItem, int targState, bool inFinish )
+{
+ ret << "{" << STACK() << "[" << TOP() << "++] = " << CS() << "; " << CS() << " = (";
+ INLINE_LIST( ret, ilItem->children, targState, inFinish );
+ ret << "); " << CTRL_FLOW() << "goto _again;}";
+}
+
+void TabCodeGen::RET( ostream &ret, bool inFinish )
+{
+ ret << "{" << CS() << " = " << STACK() << "[--" <<
+ TOP() << "]; " << CTRL_FLOW() << "goto _again;}";
+}
+
+void TabCodeGen::BREAK( ostream &ret, int targState )
+{
+ outLabelUsed = true;
+ ret << CTRL_FLOW() << "goto _out;";
+}
+
+void TabCodeGen::writeData()
+{
+ /* If there are any transtion functions then output the array. If there
+ * are none, don't bother emitting an empty array that won't be used. */
+ if ( redFsm->anyActions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActArrItem), A() );
+ ACTIONS_ARRAY();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+
+ if ( redFsm->anyConditions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxCondOffset), CO() );
+ COND_OFFSETS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxCondLen), CL() );
+ COND_LENS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( WIDE_ALPH_TYPE(), CK() );
+ COND_KEYS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxCondSpaceId), C() );
+ COND_SPACES();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxKeyOffset), KO() );
+ KEY_OFFSETS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( WIDE_ALPH_TYPE(), K() );
+ KEYS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxSingleLen), SL() );
+ SINGLE_LENS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxRangeLen), RL() );
+ RANGE_LENS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxIndexOffset), IO() );
+ INDEX_OFFSETS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ if ( useIndicies ) {
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxIndex), I() );
+ INDICIES();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxState), TT() );
+ TRANS_TARGS_WI();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ if ( redFsm->anyActions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActionLoc), TA() );
+ TRANS_ACTIONS_WI();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+ }
+ else {
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxState), TT() );
+ TRANS_TARGS();
+ CLOSE_ARRAY() <<
+ "\n";
+
+ if ( redFsm->anyActions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActionLoc), TA() );
+ TRANS_ACTIONS();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+ }
+
+ if ( redFsm->anyToStateActions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActionLoc), TSA() );
+ TO_STATE_ACTIONS();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+
+ if ( redFsm->anyFromStateActions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActionLoc), FSA() );
+ FROM_STATE_ACTIONS();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+
+ if ( redFsm->anyEofActions() ) {
+ OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActionLoc), EA() );
+ EOF_ACTIONS();
+ CLOSE_ARRAY() <<
+ "\n";
+ }
+
+ STATE_IDS();
+}
+
+void TabCodeGen::COND_TRANSLATE()
+{
+ out <<
+ " _widec = " << GET_KEY() << ";\n"
+ " _klen = " << CL() << "[" << CS() << "];\n"
+ " _keys = " << ARR_OFF( CK(), "(" + CO() + "[" + CS() + "]*2)" ) << ";\n"
+ " if ( _klen > 0 ) {\n"
+ " " << PTR_CONST() << WIDE_ALPH_TYPE() << POINTER() << "_lower = _keys;\n"
+ " " << PTR_CONST() << WIDE_ALPH_TYPE() << POINTER() << "_mid;\n"
+ " " << PTR_CONST() << WIDE_ALPH_TYPE() << POINTER() << "_upper = _keys + (_klen<<1) - 2;\n"
+ " while (1) {\n"
+ " if ( _upper < _lower )\n"
+ " break;\n"
+ "\n"
+ " _mid = _lower + (((_upper-_lower) >> 1) & ~1);\n"
+ " if ( " << GET_WIDE_KEY() << " < _mid[0] )\n"
+ " _upper = _mid - 2;\n"
+ " else if ( " << GET_WIDE_KEY() << " > _mid[1] )\n"
+ " _lower = _mid + 2;\n"
+ " else {\n"
+ " switch ( " << C() << "[" << CO() << "[" << CS() << "]"
+ " + ((_mid - _keys)>>1)] ) {\n";
+
+ for ( CondSpaceList::Iter csi = condSpaceList; csi.lte(); csi++ ) {
+ CondSpace *condSpace = csi;
+ out << " case " << condSpace->condSpaceId << ": {\n";
+ out << TABS(2) << "_widec = " << CAST(WIDE_ALPH_TYPE()) << "(" <<
+ KEY(condSpace->baseKey) << " + (" << GET_KEY() <<
+ " - " << KEY(keyOps->minKey) << "));\n";
+
+ for ( CondSet::Iter csi = condSpace->condSet; csi.lte(); csi++ ) {
+ out << TABS(2) << "if ( ";
+ CONDITION( out, *csi );
+ Size condValOffset = ((1 << csi.pos()) * keyOps->alphSize());
+ out << " ) _widec += " << condValOffset << ";\n";
+ }
+
+ out <<
+ " break;\n"
+ " }\n";
+ }
+
+ SWITCH_DEFAULT();
+
+ out <<
+ " }\n"
+ " break;\n"
+ " }\n"
+ " }\n"
+ " }\n"
+ "\n";
+}
+
+void TabCodeGen::writeExec()
+{
+ outLabelUsed = false;
+
+ out <<
+ " {\n"
+ " int _klen";
+
+ if ( redFsm->anyRegCurStateRef() )
+ out << ", _ps";
+
+ out <<
+ ";\n"
+ " " << UINT() << " _trans;\n";
+
+ if ( redFsm->anyConditions() )
+ out << " " << WIDE_ALPH_TYPE() << " _widec;\n";
+
+ if ( redFsm->anyToStateActions() || redFsm->anyRegActions()
+ || redFsm->anyFromStateActions() )
+ {
+ out <<
+ " " << PTR_CONST() << ARRAY_TYPE(redFsm->maxActArrItem) << POINTER() << "_acts;\n"
+ " " << UINT() << " _nacts;\n";
+ }
+
+ out <<
+ " " << PTR_CONST() << WIDE_ALPH_TYPE() << POINTER() << "_keys;\n"
+ "\n";
+
+ if ( hasEnd ) {
+ outLabelUsed = true;
+ out <<
+ " if ( " << P() << " == " << PE() << " )\n"
+ " goto _out;\n";
+ }
+
+ out << "_resume:\n";
+
+ if ( redFsm->errState != 0 ) {
+ outLabelUsed = true;
+ out <<
+ " if ( " << CS() << " == " << redFsm->errState->id << " )\n"
+ " goto _out;\n";
+ }
+
+ if ( redFsm->anyFromStateActions() ) {
+ out <<
+ " _acts = " << ARR_OFF( A(), FSA() + "[" + CS() + "]" ) << ";\n"
+ " _nacts = " << CAST(UINT()) << " *_acts++;\n"
+ " while ( _nacts-- > 0 ) {\n"
+ " switch ( *_acts++ ) {\n";
+ FROM_STATE_ACTION_SWITCH();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ " }\n"
+ "\n";
+ }
+
+ if ( redFsm->anyConditions() )
+ COND_TRANSLATE();
+
+ LOCATE_TRANS();
+
+ out << "_match:\n";
+
+ if ( redFsm->anyRegCurStateRef() )
+ out << " _ps = " << CS() << ";\n";
+
+ if ( useIndicies )
+ out << " _trans = " << I() << "[_trans];\n";
+
+ out <<
+ " " << CS() << " = " << TT() << "[_trans];\n"
+ "\n";
+
+ if ( redFsm->anyRegActions() ) {
+ out <<
+ " if ( " << TA() << "[_trans] == 0 )\n"
+ " goto _again;\n"
+ "\n"
+ " _acts = " << ARR_OFF( A(), TA() + "[_trans]" ) << ";\n"
+ " _nacts = " << CAST(UINT()) << " *_acts++;\n"
+ " while ( _nacts-- > 0 )\n {\n"
+ " switch ( *_acts++ )\n {\n";
+ ACTION_SWITCH();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ " }\n"
+ "\n";
+ }
+
+ if ( redFsm->anyRegActions() || redFsm->anyActionGotos() ||
+ redFsm->anyActionCalls() || redFsm->anyActionRets() )
+ out << "_again:\n";
+
+ if ( redFsm->anyToStateActions() ) {
+ out <<
+ " _acts = " << ARR_OFF( A(), TSA() + "[" + CS() + "]" ) << ";\n"
+ " _nacts = " << CAST(UINT()) << " *_acts++;\n"
+ " while ( _nacts-- > 0 ) {\n"
+ " switch ( *_acts++ ) {\n";
+ TO_STATE_ACTION_SWITCH();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ " }\n"
+ "\n";
+ }
+
+ if ( hasEnd ) {
+ out <<
+ " if ( ++" << P() << " != " << PE() << " )\n"
+ " goto _resume;\n";
+ }
+ else {
+ out <<
+ " " << P() << " += 1;\n"
+ " goto _resume;\n";
+ }
+
+ if ( outLabelUsed )
+ out << " _out: {}\n";
+
+ out << " }\n";
+}
+
+
+void TabCodeGen::writeEOF()
+{
+ if ( redFsm->anyEofActions() ) {
+ out <<
+ " {\n"
+ " " << PTR_CONST() << ARRAY_TYPE(redFsm->maxActArrItem) << POINTER() << "_acts = " <<
+ ARR_OFF( A(), EA() + "[" + CS() + "]" ) << ";\n"
+ " " << UINT() << " _nacts = " << CAST(UINT()) << " *_acts++;\n"
+ " while ( _nacts-- > 0 ) {\n"
+ " switch ( *_acts++ ) {\n";
+ EOF_ACTION_SWITCH();
+ SWITCH_DEFAULT() <<
+ " }\n"
+ " }\n"
+ " }\n"
+ "\n";
+ }
+}
diff --git a/contrib/tools/ragel5/rlgen-cd/tabcodegen.h b/contrib/tools/ragel5/rlgen-cd/tabcodegen.h
new file mode 100644
index 0000000000..745eb18d81
--- /dev/null
+++ b/contrib/tools/ragel5/rlgen-cd/tabcodegen.h
@@ -0,0 +1,115 @@
+/*
+ * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ * 2004 Erich Ocean <eric.ocean@ampede.com>
+ * 2005 Alan West <alan@alanz.com>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _TABCODEGEN_H
+#define _TABCODEGEN_H
+
+#include <iostream>
+#include "fsmcodegen.h"
+
+/* Forwards. */
+struct CodeGenData;
+struct NameInst;
+struct RedTransAp;
+struct RedStateAp;
+
+/*
+ * TabCodeGen
+ */
+class TabCodeGen : virtual public FsmCodeGen
+{
+public:
+ TabCodeGen( ostream &out ) : FsmCodeGen(out) {}
+ virtual ~TabCodeGen() { }
+ virtual void writeData();
+ virtual void writeExec();
+
+protected:
+ std::ostream &TO_STATE_ACTION_SWITCH();
+ std::ostream &FROM_STATE_ACTION_SWITCH();
+ std::ostream &EOF_ACTION_SWITCH();
+ std::ostream &ACTION_SWITCH();
+
+ std::ostream &COND_KEYS();
+ std::ostream &COND_SPACES();
+ std::ostream &KEYS();
+ std::ostream &INDICIES();
+ std::ostream &COND_OFFSETS();
+ std::ostream &KEY_OFFSETS();
+ std::ostream &INDEX_OFFSETS();
+ std::ostream &COND_LENS();
+ std::ostream &SINGLE_LENS();
+ std::ostream &RANGE_LENS();
+ std::ostream &TO_STATE_ACTIONS();
+ std::ostream &FROM_STATE_ACTIONS();
+ std::ostream &EOF_ACTIONS();
+ std::ostream &TRANS_TARGS();
+ std::ostream &TRANS_ACTIONS();
+ std::ostream &TRANS_TARGS_WI();
+ std::ostream &TRANS_ACTIONS_WI();
+ void LOCATE_TRANS();
+
+ void COND_TRANSLATE();
+
+ void GOTO( ostream &ret, int gotoDest, bool inFinish );
+ void CALL( ostream &ret, int callDest, int targState, bool inFinish );
+ void NEXT( ostream &ret, int nextDest, bool inFinish );
+ void GOTO_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish );
+ void NEXT_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish );
+ void CALL_EXPR( ostream &ret, InlineItem *ilItem, int targState, bool inFinish );
+ void CURS( ostream &ret, bool inFinish );
+ void TARGS( ostream &ret, bool inFinish, int targState );
+ void RET( ostream &ret, bool inFinish );
+ void BREAK( ostream &ret, int targState );
+
+ virtual std::ostream &TO_STATE_ACTION( RedStateAp *state );
+ virtual std::ostream &FROM_STATE_ACTION( RedStateAp *state );
+ virtual std::ostream &EOF_ACTION( RedStateAp *state );
+ virtual std::ostream &TRANS_ACTION( RedTransAp *trans );
+ virtual void calcIndexSize();
+ virtual void writeEOF();
+};
+
+
+/*
+ * CTabCodeGen
+ */
+struct CTabCodeGen
+ : public TabCodeGen, public CCodeGen
+{
+ CTabCodeGen( ostream &out ) :
+ FsmCodeGen(out), TabCodeGen(out), CCodeGen(out) {}
+};
+
+/*
+ * DTabCodeGen
+ */
+struct DTabCodeGen
+ : public TabCodeGen, public DCodeGen
+{
+ DTabCodeGen( ostream &out ) :
+ FsmCodeGen(out), TabCodeGen(out), DCodeGen(out) {}
+};
+
+
+#endif /* _TABCODEGEN_H */
diff --git a/contrib/tools/ragel5/rlgen-cd/ya.make b/contrib/tools/ragel5/rlgen-cd/ya.make
new file mode 100644
index 0000000000..ef2a59f8c2
--- /dev/null
+++ b/contrib/tools/ragel5/rlgen-cd/ya.make
@@ -0,0 +1,25 @@
+PROGRAM()
+
+NO_UTIL()
+NO_COMPILER_WARNINGS()
+
+PEERDIR(
+ contrib/tools/ragel5/aapl
+ contrib/tools/ragel5/common
+ contrib/tools/ragel5/redfsm
+)
+
+SRCS(
+ fflatcodegen.cpp
+ fgotocodegen.cpp
+ flatcodegen.cpp
+ fsmcodegen.cpp
+ ftabcodegen.cpp
+ gotocodegen.cpp
+ ipgotocodegen.cpp
+ main.cpp
+ splitcodegen.cpp
+ tabcodegen.cpp
+)
+
+END()
diff --git a/geobase/CMakeLists.txt b/geobase/CMakeLists.txt
new file mode 100644
index 0000000000..164af3f4bc
--- /dev/null
+++ b/geobase/CMakeLists.txt
@@ -0,0 +1,10 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(library)
+add_subdirectory(user-settings)
diff --git a/geobase/library/CMakeLists.darwin-x86_64.txt b/geobase/library/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..7257bb9ee1
--- /dev/null
+++ b/geobase/library/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,22 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(abi)
+add_subdirectory(api)
+add_subdirectory(city_id_calc)
+add_subdirectory(db)
+add_subdirectory(dispute_regs)
+add_subdirectory(utils)
+
+add_library(geobase-library INTERFACE)
+target_link_libraries(geobase-library INTERFACE
+ contrib-libs-cxxsupp
+ yutil
+ geobase-library-api
+ library-db-stub
+)
diff --git a/geobase/library/CMakeLists.linux-aarch64.txt b/geobase/library/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..8a01a727c4
--- /dev/null
+++ b/geobase/library/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,23 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(abi)
+add_subdirectory(api)
+add_subdirectory(city_id_calc)
+add_subdirectory(db)
+add_subdirectory(dispute_regs)
+add_subdirectory(utils)
+
+add_library(geobase-library INTERFACE)
+target_link_libraries(geobase-library INTERFACE
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ geobase-library-api
+ library-db-stub
+)
diff --git a/geobase/library/CMakeLists.linux-x86_64.txt b/geobase/library/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..8a01a727c4
--- /dev/null
+++ b/geobase/library/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,23 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(abi)
+add_subdirectory(api)
+add_subdirectory(city_id_calc)
+add_subdirectory(db)
+add_subdirectory(dispute_regs)
+add_subdirectory(utils)
+
+add_library(geobase-library INTERFACE)
+target_link_libraries(geobase-library INTERFACE
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ geobase-library-api
+ library-db-stub
+)
diff --git a/geobase/library/CMakeLists.txt b/geobase/library/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/geobase/library/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/geobase/library/CMakeLists.windows-x86_64.txt b/geobase/library/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..7257bb9ee1
--- /dev/null
+++ b/geobase/library/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,22 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(abi)
+add_subdirectory(api)
+add_subdirectory(city_id_calc)
+add_subdirectory(db)
+add_subdirectory(dispute_regs)
+add_subdirectory(utils)
+
+add_library(geobase-library INTERFACE)
+target_link_libraries(geobase-library INTERFACE
+ contrib-libs-cxxsupp
+ yutil
+ geobase-library-api
+ library-db-stub
+)
diff --git a/geobase/library/abi/CMakeLists.darwin-x86_64.txt b/geobase/library/abi/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..9c18e158b2
--- /dev/null
+++ b/geobase/library/abi/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,56 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(geobase-library-abi)
+target_compile_options(geobase-library-abi PRIVATE
+ $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything>
+)
+target_include_directories(geobase-library-abi PRIVATE
+ ${CMAKE_SOURCE_DIR}/geobase/user-settings/include
+)
+target_link_libraries(geobase-library-abi PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-cctz
+ geobase-library-city_id_calc
+ geobase-library-dispute_regs
+ geobase-library-utils
+ geobase-user-settings
+ library-cpp-geohash
+ library-cpp-json
+ cpp-reverse_geocoder-core
+)
+target_sources(geobase-library-abi PRIVATE
+ ${CMAKE_SOURCE_DIR}/geobase/library/asset.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/as_resource.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/binary_reader.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/binary_format_internals.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/borders_resource.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/ipreg_resource.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/ipreg_isp_resource.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/ipreg_traits_resource.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/isp_names_resource.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/fake_resource.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/lookup_impl.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/lookup_impl_impl.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/region_type.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/regions_resource.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/region_types_resource.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/region_fields_resource.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/regions_locale_resource.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/resource_base.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/structs_impl.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/timezone_getter_impl.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/tor_resource.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/v4.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/v6.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/vp_point_distance.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/vp_tree.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/vp_tree_search.cpp
+)
diff --git a/geobase/library/abi/CMakeLists.linux-aarch64.txt b/geobase/library/abi/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..e5370f291c
--- /dev/null
+++ b/geobase/library/abi/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,57 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(geobase-library-abi)
+target_compile_options(geobase-library-abi PRIVATE
+ $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything>
+)
+target_include_directories(geobase-library-abi PRIVATE
+ ${CMAKE_SOURCE_DIR}/geobase/user-settings/include
+)
+target_link_libraries(geobase-library-abi PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-cctz
+ geobase-library-city_id_calc
+ geobase-library-dispute_regs
+ geobase-library-utils
+ geobase-user-settings
+ library-cpp-geohash
+ library-cpp-json
+ cpp-reverse_geocoder-core
+)
+target_sources(geobase-library-abi PRIVATE
+ ${CMAKE_SOURCE_DIR}/geobase/library/asset.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/as_resource.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/binary_reader.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/binary_format_internals.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/borders_resource.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/ipreg_resource.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/ipreg_isp_resource.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/ipreg_traits_resource.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/isp_names_resource.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/fake_resource.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/lookup_impl.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/lookup_impl_impl.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/region_type.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/regions_resource.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/region_types_resource.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/region_fields_resource.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/regions_locale_resource.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/resource_base.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/structs_impl.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/timezone_getter_impl.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/tor_resource.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/v4.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/v6.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/vp_point_distance.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/vp_tree.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/vp_tree_search.cpp
+)
diff --git a/geobase/library/abi/CMakeLists.linux-x86_64.txt b/geobase/library/abi/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..e5370f291c
--- /dev/null
+++ b/geobase/library/abi/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,57 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(geobase-library-abi)
+target_compile_options(geobase-library-abi PRIVATE
+ $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything>
+)
+target_include_directories(geobase-library-abi PRIVATE
+ ${CMAKE_SOURCE_DIR}/geobase/user-settings/include
+)
+target_link_libraries(geobase-library-abi PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-cctz
+ geobase-library-city_id_calc
+ geobase-library-dispute_regs
+ geobase-library-utils
+ geobase-user-settings
+ library-cpp-geohash
+ library-cpp-json
+ cpp-reverse_geocoder-core
+)
+target_sources(geobase-library-abi PRIVATE
+ ${CMAKE_SOURCE_DIR}/geobase/library/asset.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/as_resource.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/binary_reader.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/binary_format_internals.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/borders_resource.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/ipreg_resource.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/ipreg_isp_resource.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/ipreg_traits_resource.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/isp_names_resource.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/fake_resource.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/lookup_impl.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/lookup_impl_impl.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/region_type.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/regions_resource.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/region_types_resource.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/region_fields_resource.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/regions_locale_resource.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/resource_base.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/structs_impl.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/timezone_getter_impl.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/tor_resource.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/v4.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/v6.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/vp_point_distance.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/vp_tree.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/vp_tree_search.cpp
+)
diff --git a/geobase/library/abi/CMakeLists.txt b/geobase/library/abi/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/geobase/library/abi/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/geobase/library/abi/CMakeLists.windows-x86_64.txt b/geobase/library/abi/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..9c18e158b2
--- /dev/null
+++ b/geobase/library/abi/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,56 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(geobase-library-abi)
+target_compile_options(geobase-library-abi PRIVATE
+ $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything>
+)
+target_include_directories(geobase-library-abi PRIVATE
+ ${CMAKE_SOURCE_DIR}/geobase/user-settings/include
+)
+target_link_libraries(geobase-library-abi PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-cctz
+ geobase-library-city_id_calc
+ geobase-library-dispute_regs
+ geobase-library-utils
+ geobase-user-settings
+ library-cpp-geohash
+ library-cpp-json
+ cpp-reverse_geocoder-core
+)
+target_sources(geobase-library-abi PRIVATE
+ ${CMAKE_SOURCE_DIR}/geobase/library/asset.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/as_resource.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/binary_reader.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/binary_format_internals.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/borders_resource.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/ipreg_resource.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/ipreg_isp_resource.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/ipreg_traits_resource.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/isp_names_resource.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/fake_resource.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/lookup_impl.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/lookup_impl_impl.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/region_type.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/regions_resource.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/region_types_resource.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/region_fields_resource.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/regions_locale_resource.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/resource_base.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/structs_impl.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/timezone_getter_impl.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/tor_resource.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/v4.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/v6.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/vp_point_distance.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/vp_tree.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/vp_tree_search.cpp
+)
diff --git a/geobase/library/api/CMakeLists.darwin-x86_64.txt b/geobase/library/api/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..e7d1812621
--- /dev/null
+++ b/geobase/library/api/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,25 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(geobase-library-api)
+target_compile_options(geobase-library-api PRIVATE
+ $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything>
+)
+target_link_libraries(geobase-library-api PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ geobase-library-abi
+)
+target_sources(geobase-library-api PRIVATE
+ ${CMAKE_SOURCE_DIR}/geobase/library/lookup.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/lookup_wrapper.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/service_getter.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/timezone_getter.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/known_tz_list.cpp
+)
diff --git a/geobase/library/api/CMakeLists.linux-aarch64.txt b/geobase/library/api/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..40ecd7c759
--- /dev/null
+++ b/geobase/library/api/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,26 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(geobase-library-api)
+target_compile_options(geobase-library-api PRIVATE
+ $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything>
+)
+target_link_libraries(geobase-library-api PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ geobase-library-abi
+)
+target_sources(geobase-library-api PRIVATE
+ ${CMAKE_SOURCE_DIR}/geobase/library/lookup.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/lookup_wrapper.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/service_getter.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/timezone_getter.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/known_tz_list.cpp
+)
diff --git a/geobase/library/api/CMakeLists.linux-x86_64.txt b/geobase/library/api/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..40ecd7c759
--- /dev/null
+++ b/geobase/library/api/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,26 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(geobase-library-api)
+target_compile_options(geobase-library-api PRIVATE
+ $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything>
+)
+target_link_libraries(geobase-library-api PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ geobase-library-abi
+)
+target_sources(geobase-library-api PRIVATE
+ ${CMAKE_SOURCE_DIR}/geobase/library/lookup.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/lookup_wrapper.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/service_getter.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/timezone_getter.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/known_tz_list.cpp
+)
diff --git a/geobase/library/api/CMakeLists.txt b/geobase/library/api/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/geobase/library/api/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/geobase/library/api/CMakeLists.windows-x86_64.txt b/geobase/library/api/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..e7d1812621
--- /dev/null
+++ b/geobase/library/api/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,25 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(geobase-library-api)
+target_compile_options(geobase-library-api PRIVATE
+ $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything>
+)
+target_link_libraries(geobase-library-api PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ geobase-library-abi
+)
+target_sources(geobase-library-api PRIVATE
+ ${CMAKE_SOURCE_DIR}/geobase/library/lookup.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/lookup_wrapper.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/service_getter.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/timezone_getter.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/library/known_tz_list.cpp
+)
diff --git a/geobase/library/city_id_calc/CMakeLists.darwin-x86_64.txt b/geobase/library/city_id_calc/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..5859e8c377
--- /dev/null
+++ b/geobase/library/city_id_calc/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,20 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(geobase-library-city_id_calc)
+target_compile_options(geobase-library-city_id_calc PRIVATE
+ $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything>
+)
+target_link_libraries(geobase-library-city_id_calc PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+)
+target_sources(geobase-library-city_id_calc PRIVATE
+ ${CMAKE_SOURCE_DIR}/geobase/library/city_id_calc/city_id_base.cpp
+)
diff --git a/geobase/library/city_id_calc/CMakeLists.linux-aarch64.txt b/geobase/library/city_id_calc/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..799c117c73
--- /dev/null
+++ b/geobase/library/city_id_calc/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,21 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(geobase-library-city_id_calc)
+target_compile_options(geobase-library-city_id_calc PRIVATE
+ $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything>
+)
+target_link_libraries(geobase-library-city_id_calc PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+)
+target_sources(geobase-library-city_id_calc PRIVATE
+ ${CMAKE_SOURCE_DIR}/geobase/library/city_id_calc/city_id_base.cpp
+)
diff --git a/geobase/library/city_id_calc/CMakeLists.linux-x86_64.txt b/geobase/library/city_id_calc/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..799c117c73
--- /dev/null
+++ b/geobase/library/city_id_calc/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,21 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(geobase-library-city_id_calc)
+target_compile_options(geobase-library-city_id_calc PRIVATE
+ $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything>
+)
+target_link_libraries(geobase-library-city_id_calc PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+)
+target_sources(geobase-library-city_id_calc PRIVATE
+ ${CMAKE_SOURCE_DIR}/geobase/library/city_id_calc/city_id_base.cpp
+)
diff --git a/geobase/library/city_id_calc/CMakeLists.txt b/geobase/library/city_id_calc/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/geobase/library/city_id_calc/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/geobase/library/city_id_calc/CMakeLists.windows-x86_64.txt b/geobase/library/city_id_calc/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..5859e8c377
--- /dev/null
+++ b/geobase/library/city_id_calc/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,20 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(geobase-library-city_id_calc)
+target_compile_options(geobase-library-city_id_calc PRIVATE
+ $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything>
+)
+target_link_libraries(geobase-library-city_id_calc PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+)
+target_sources(geobase-library-city_id_calc PRIVATE
+ ${CMAKE_SOURCE_DIR}/geobase/library/city_id_calc/city_id_base.cpp
+)
diff --git a/geobase/library/db/CMakeLists.txt b/geobase/library/db/CMakeLists.txt
new file mode 100644
index 0000000000..70fc7a172d
--- /dev/null
+++ b/geobase/library/db/CMakeLists.txt
@@ -0,0 +1,9 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(stub)
diff --git a/geobase/library/db/stub/CMakeLists.darwin-x86_64.txt b/geobase/library/db/stub/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..4960807960
--- /dev/null
+++ b/geobase/library/db/stub/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,21 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(library-db-stub)
+target_compile_options(library-db-stub PRIVATE
+ $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything>
+)
+target_link_libraries(library-db-stub PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-protobuf
+)
+target_sources(library-db-stub PRIVATE
+ ${CMAKE_SOURCE_DIR}/geobase/library/db/stub/db_resources_internals_stub.cpp
+)
diff --git a/geobase/library/db/stub/CMakeLists.linux-aarch64.txt b/geobase/library/db/stub/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..4c10cc50ef
--- /dev/null
+++ b/geobase/library/db/stub/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,22 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(library-db-stub)
+target_compile_options(library-db-stub PRIVATE
+ $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything>
+)
+target_link_libraries(library-db-stub PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-protobuf
+)
+target_sources(library-db-stub PRIVATE
+ ${CMAKE_SOURCE_DIR}/geobase/library/db/stub/db_resources_internals_stub.cpp
+)
diff --git a/geobase/library/db/stub/CMakeLists.linux-x86_64.txt b/geobase/library/db/stub/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..4c10cc50ef
--- /dev/null
+++ b/geobase/library/db/stub/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,22 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(library-db-stub)
+target_compile_options(library-db-stub PRIVATE
+ $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything>
+)
+target_link_libraries(library-db-stub PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-protobuf
+)
+target_sources(library-db-stub PRIVATE
+ ${CMAKE_SOURCE_DIR}/geobase/library/db/stub/db_resources_internals_stub.cpp
+)
diff --git a/geobase/library/db/stub/CMakeLists.txt b/geobase/library/db/stub/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/geobase/library/db/stub/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/geobase/library/db/stub/CMakeLists.windows-x86_64.txt b/geobase/library/db/stub/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..4960807960
--- /dev/null
+++ b/geobase/library/db/stub/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,21 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(library-db-stub)
+target_compile_options(library-db-stub PRIVATE
+ $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything>
+)
+target_link_libraries(library-db-stub PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-protobuf
+)
+target_sources(library-db-stub PRIVATE
+ ${CMAKE_SOURCE_DIR}/geobase/library/db/stub/db_resources_internals_stub.cpp
+)
diff --git a/geobase/library/dispute_regs/CMakeLists.darwin-x86_64.txt b/geobase/library/dispute_regs/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..30cef124f7
--- /dev/null
+++ b/geobase/library/dispute_regs/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,24 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(proto)
+add_subdirectory(resource)
+
+add_library(geobase-library-dispute_regs)
+target_link_libraries(geobase-library-dispute_regs PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ library-dispute_regs-proto
+ library-dispute_regs-resource
+ cpp-protobuf-json
+ library-cpp-json
+ library-cpp-resource
+)
+target_sources(geobase-library-dispute_regs PRIVATE
+ ${CMAKE_SOURCE_DIR}/geobase/library/dispute_regs/config.cpp
+)
diff --git a/geobase/library/dispute_regs/CMakeLists.linux-aarch64.txt b/geobase/library/dispute_regs/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..769eac8567
--- /dev/null
+++ b/geobase/library/dispute_regs/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,25 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(proto)
+add_subdirectory(resource)
+
+add_library(geobase-library-dispute_regs)
+target_link_libraries(geobase-library-dispute_regs PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ library-dispute_regs-proto
+ library-dispute_regs-resource
+ cpp-protobuf-json
+ library-cpp-json
+ library-cpp-resource
+)
+target_sources(geobase-library-dispute_regs PRIVATE
+ ${CMAKE_SOURCE_DIR}/geobase/library/dispute_regs/config.cpp
+)
diff --git a/geobase/library/dispute_regs/CMakeLists.linux-x86_64.txt b/geobase/library/dispute_regs/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..769eac8567
--- /dev/null
+++ b/geobase/library/dispute_regs/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,25 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(proto)
+add_subdirectory(resource)
+
+add_library(geobase-library-dispute_regs)
+target_link_libraries(geobase-library-dispute_regs PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ library-dispute_regs-proto
+ library-dispute_regs-resource
+ cpp-protobuf-json
+ library-cpp-json
+ library-cpp-resource
+)
+target_sources(geobase-library-dispute_regs PRIVATE
+ ${CMAKE_SOURCE_DIR}/geobase/library/dispute_regs/config.cpp
+)
diff --git a/geobase/library/dispute_regs/CMakeLists.txt b/geobase/library/dispute_regs/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/geobase/library/dispute_regs/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/geobase/library/dispute_regs/CMakeLists.windows-x86_64.txt b/geobase/library/dispute_regs/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..30cef124f7
--- /dev/null
+++ b/geobase/library/dispute_regs/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,24 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(proto)
+add_subdirectory(resource)
+
+add_library(geobase-library-dispute_regs)
+target_link_libraries(geobase-library-dispute_regs PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ library-dispute_regs-proto
+ library-dispute_regs-resource
+ cpp-protobuf-json
+ library-cpp-json
+ library-cpp-resource
+)
+target_sources(geobase-library-dispute_regs PRIVATE
+ ${CMAKE_SOURCE_DIR}/geobase/library/dispute_regs/config.cpp
+)
diff --git a/geobase/library/dispute_regs/proto/CMakeLists.darwin-x86_64.txt b/geobase/library/dispute_regs/proto/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..d2fb124680
--- /dev/null
+++ b/geobase/library/dispute_regs/proto/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,43 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(library-dispute_regs-proto)
+target_link_libraries(library-dispute_regs-proto PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-protobuf
+)
+target_proto_messages(library-dispute_regs-proto PRIVATE
+ ${CMAKE_SOURCE_DIR}/geobase/library/dispute_regs/proto/config.proto
+)
+target_proto_addincls(library-dispute_regs-proto
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(library-dispute_regs-proto
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/geobase/library/dispute_regs/proto/CMakeLists.linux-aarch64.txt b/geobase/library/dispute_regs/proto/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..998b76e8c7
--- /dev/null
+++ b/geobase/library/dispute_regs/proto/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,44 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(library-dispute_regs-proto)
+target_link_libraries(library-dispute_regs-proto PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-protobuf
+)
+target_proto_messages(library-dispute_regs-proto PRIVATE
+ ${CMAKE_SOURCE_DIR}/geobase/library/dispute_regs/proto/config.proto
+)
+target_proto_addincls(library-dispute_regs-proto
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(library-dispute_regs-proto
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/geobase/library/dispute_regs/proto/CMakeLists.linux-x86_64.txt b/geobase/library/dispute_regs/proto/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..998b76e8c7
--- /dev/null
+++ b/geobase/library/dispute_regs/proto/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,44 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(library-dispute_regs-proto)
+target_link_libraries(library-dispute_regs-proto PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-protobuf
+)
+target_proto_messages(library-dispute_regs-proto PRIVATE
+ ${CMAKE_SOURCE_DIR}/geobase/library/dispute_regs/proto/config.proto
+)
+target_proto_addincls(library-dispute_regs-proto
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(library-dispute_regs-proto
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/geobase/library/dispute_regs/proto/CMakeLists.txt b/geobase/library/dispute_regs/proto/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/geobase/library/dispute_regs/proto/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/geobase/library/dispute_regs/proto/CMakeLists.windows-x86_64.txt b/geobase/library/dispute_regs/proto/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..d2fb124680
--- /dev/null
+++ b/geobase/library/dispute_regs/proto/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,43 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(library-dispute_regs-proto)
+target_link_libraries(library-dispute_regs-proto PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-protobuf
+)
+target_proto_messages(library-dispute_regs-proto PRIVATE
+ ${CMAKE_SOURCE_DIR}/geobase/library/dispute_regs/proto/config.proto
+)
+target_proto_addincls(library-dispute_regs-proto
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(library-dispute_regs-proto
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/geobase/library/dispute_regs/resource/CMakeLists.darwin-x86_64.txt b/geobase/library/dispute_regs/resource/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..76aa5d01ef
--- /dev/null
+++ b/geobase/library/dispute_regs/resource/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,38 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_rescompiler_bin
+ TOOL_rescompiler_dependency
+ tools/rescompiler/bin
+ rescompiler
+)
+
+add_library(library-dispute_regs-resource INTERFACE)
+target_link_libraries(library-dispute_regs-resource INTERFACE
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-resource
+)
+
+add_global_library_for(library-dispute_regs-resource.global library-dispute_regs-resource)
+target_link_libraries(library-dispute_regs-resource.global PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-resource
+)
+target_sources(library-dispute_regs-resource.global PRIVATE
+ ${CMAKE_BINARY_DIR}/geobase/library/dispute_regs/resource/06b2d3b23dce96e1619d2b53d6c947ec.cpp
+)
+resources(library-dispute_regs-resource.global
+ ${CMAKE_BINARY_DIR}/geobase/library/dispute_regs/resource/06b2d3b23dce96e1619d2b53d6c947ec.cpp
+ INPUTS
+ ${CMAKE_SOURCE_DIR}/geobase/library/dispute_regs/resource/config.json
+ KEYS
+ /geobase/dispute-config
+)
diff --git a/geobase/library/dispute_regs/resource/CMakeLists.linux-aarch64.txt b/geobase/library/dispute_regs/resource/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..7587a04962
--- /dev/null
+++ b/geobase/library/dispute_regs/resource/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,40 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_rescompiler_bin
+ TOOL_rescompiler_dependency
+ tools/rescompiler/bin
+ rescompiler
+)
+
+add_library(library-dispute_regs-resource INTERFACE)
+target_link_libraries(library-dispute_regs-resource INTERFACE
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-resource
+)
+
+add_global_library_for(library-dispute_regs-resource.global library-dispute_regs-resource)
+target_link_libraries(library-dispute_regs-resource.global PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-resource
+)
+target_sources(library-dispute_regs-resource.global PRIVATE
+ ${CMAKE_BINARY_DIR}/geobase/library/dispute_regs/resource/06b2d3b23dce96e1619d2b53d6c947ec.cpp
+)
+resources(library-dispute_regs-resource.global
+ ${CMAKE_BINARY_DIR}/geobase/library/dispute_regs/resource/06b2d3b23dce96e1619d2b53d6c947ec.cpp
+ INPUTS
+ ${CMAKE_SOURCE_DIR}/geobase/library/dispute_regs/resource/config.json
+ KEYS
+ /geobase/dispute-config
+)
diff --git a/geobase/library/dispute_regs/resource/CMakeLists.linux-x86_64.txt b/geobase/library/dispute_regs/resource/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..7587a04962
--- /dev/null
+++ b/geobase/library/dispute_regs/resource/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,40 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_rescompiler_bin
+ TOOL_rescompiler_dependency
+ tools/rescompiler/bin
+ rescompiler
+)
+
+add_library(library-dispute_regs-resource INTERFACE)
+target_link_libraries(library-dispute_regs-resource INTERFACE
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-resource
+)
+
+add_global_library_for(library-dispute_regs-resource.global library-dispute_regs-resource)
+target_link_libraries(library-dispute_regs-resource.global PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-resource
+)
+target_sources(library-dispute_regs-resource.global PRIVATE
+ ${CMAKE_BINARY_DIR}/geobase/library/dispute_regs/resource/06b2d3b23dce96e1619d2b53d6c947ec.cpp
+)
+resources(library-dispute_regs-resource.global
+ ${CMAKE_BINARY_DIR}/geobase/library/dispute_regs/resource/06b2d3b23dce96e1619d2b53d6c947ec.cpp
+ INPUTS
+ ${CMAKE_SOURCE_DIR}/geobase/library/dispute_regs/resource/config.json
+ KEYS
+ /geobase/dispute-config
+)
diff --git a/geobase/library/dispute_regs/resource/CMakeLists.txt b/geobase/library/dispute_regs/resource/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/geobase/library/dispute_regs/resource/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/geobase/library/dispute_regs/resource/CMakeLists.windows-x86_64.txt b/geobase/library/dispute_regs/resource/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..76aa5d01ef
--- /dev/null
+++ b/geobase/library/dispute_regs/resource/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,38 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_rescompiler_bin
+ TOOL_rescompiler_dependency
+ tools/rescompiler/bin
+ rescompiler
+)
+
+add_library(library-dispute_regs-resource INTERFACE)
+target_link_libraries(library-dispute_regs-resource INTERFACE
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-resource
+)
+
+add_global_library_for(library-dispute_regs-resource.global library-dispute_regs-resource)
+target_link_libraries(library-dispute_regs-resource.global PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-resource
+)
+target_sources(library-dispute_regs-resource.global PRIVATE
+ ${CMAKE_BINARY_DIR}/geobase/library/dispute_regs/resource/06b2d3b23dce96e1619d2b53d6c947ec.cpp
+)
+resources(library-dispute_regs-resource.global
+ ${CMAKE_BINARY_DIR}/geobase/library/dispute_regs/resource/06b2d3b23dce96e1619d2b53d6c947ec.cpp
+ INPUTS
+ ${CMAKE_SOURCE_DIR}/geobase/library/dispute_regs/resource/config.json
+ KEYS
+ /geobase/dispute-config
+)
diff --git a/geobase/library/utils/CMakeLists.darwin-x86_64.txt b/geobase/library/utils/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..41b801b27f
--- /dev/null
+++ b/geobase/library/utils/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,23 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+find_package(OpenSSL REQUIRED)
+
+add_library(geobase-library-utils)
+target_compile_options(geobase-library-utils PRIVATE
+ $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything>
+)
+target_link_libraries(geobase-library-utils PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ OpenSSL::OpenSSL
+ contrib-libs-protobuf
+)
+target_sources(geobase-library-utils PRIVATE
+ ${CMAKE_SOURCE_DIR}/geobase/library/utils/builder_utils.cpp
+)
diff --git a/geobase/library/utils/CMakeLists.linux-aarch64.txt b/geobase/library/utils/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..b6478526df
--- /dev/null
+++ b/geobase/library/utils/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,24 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+find_package(OpenSSL REQUIRED)
+
+add_library(geobase-library-utils)
+target_compile_options(geobase-library-utils PRIVATE
+ $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything>
+)
+target_link_libraries(geobase-library-utils PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ OpenSSL::OpenSSL
+ contrib-libs-protobuf
+)
+target_sources(geobase-library-utils PRIVATE
+ ${CMAKE_SOURCE_DIR}/geobase/library/utils/builder_utils.cpp
+)
diff --git a/geobase/library/utils/CMakeLists.linux-x86_64.txt b/geobase/library/utils/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..b6478526df
--- /dev/null
+++ b/geobase/library/utils/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,24 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+find_package(OpenSSL REQUIRED)
+
+add_library(geobase-library-utils)
+target_compile_options(geobase-library-utils PRIVATE
+ $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything>
+)
+target_link_libraries(geobase-library-utils PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ OpenSSL::OpenSSL
+ contrib-libs-protobuf
+)
+target_sources(geobase-library-utils PRIVATE
+ ${CMAKE_SOURCE_DIR}/geobase/library/utils/builder_utils.cpp
+)
diff --git a/geobase/library/utils/CMakeLists.txt b/geobase/library/utils/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/geobase/library/utils/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/geobase/library/utils/CMakeLists.windows-x86_64.txt b/geobase/library/utils/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..41b801b27f
--- /dev/null
+++ b/geobase/library/utils/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,23 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+find_package(OpenSSL REQUIRED)
+
+add_library(geobase-library-utils)
+target_compile_options(geobase-library-utils PRIVATE
+ $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything>
+)
+target_link_libraries(geobase-library-utils PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ OpenSSL::OpenSSL
+ contrib-libs-protobuf
+)
+target_sources(geobase-library-utils PRIVATE
+ ${CMAKE_SOURCE_DIR}/geobase/library/utils/builder_utils.cpp
+)
diff --git a/geobase/user-settings/CMakeLists.darwin-x86_64.txt b/geobase/user-settings/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..ebb26215eb
--- /dev/null
+++ b/geobase/user-settings/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,31 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(geobase-user-settings)
+target_compile_options(geobase-user-settings PRIVATE
+ $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything>
+)
+target_include_directories(geobase-user-settings PRIVATE
+ ${CMAKE_SOURCE_DIR}/geobase/user-settings/include
+)
+target_link_libraries(geobase-user-settings PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ cpp-string_utils-base64
+)
+target_sources(geobase-user-settings PRIVATE
+ ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/geo_point.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/gp_container.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/gpauto_container.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/util.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/y_cookie.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/ygo_container.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/yp_cookie.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/ys_cookie.cpp
+)
diff --git a/geobase/user-settings/CMakeLists.linux-aarch64.txt b/geobase/user-settings/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..5a32556987
--- /dev/null
+++ b/geobase/user-settings/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,32 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(geobase-user-settings)
+target_compile_options(geobase-user-settings PRIVATE
+ $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything>
+)
+target_include_directories(geobase-user-settings PRIVATE
+ ${CMAKE_SOURCE_DIR}/geobase/user-settings/include
+)
+target_link_libraries(geobase-user-settings PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ cpp-string_utils-base64
+)
+target_sources(geobase-user-settings PRIVATE
+ ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/geo_point.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/gp_container.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/gpauto_container.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/util.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/y_cookie.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/ygo_container.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/yp_cookie.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/ys_cookie.cpp
+)
diff --git a/geobase/user-settings/CMakeLists.linux-x86_64.txt b/geobase/user-settings/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..5a32556987
--- /dev/null
+++ b/geobase/user-settings/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,32 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(geobase-user-settings)
+target_compile_options(geobase-user-settings PRIVATE
+ $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything>
+)
+target_include_directories(geobase-user-settings PRIVATE
+ ${CMAKE_SOURCE_DIR}/geobase/user-settings/include
+)
+target_link_libraries(geobase-user-settings PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ cpp-string_utils-base64
+)
+target_sources(geobase-user-settings PRIVATE
+ ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/geo_point.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/gp_container.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/gpauto_container.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/util.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/y_cookie.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/ygo_container.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/yp_cookie.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/ys_cookie.cpp
+)
diff --git a/geobase/user-settings/CMakeLists.txt b/geobase/user-settings/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/geobase/user-settings/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/geobase/user-settings/CMakeLists.windows-x86_64.txt b/geobase/user-settings/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..ebb26215eb
--- /dev/null
+++ b/geobase/user-settings/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,31 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(geobase-user-settings)
+target_compile_options(geobase-user-settings PRIVATE
+ $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything>
+)
+target_include_directories(geobase-user-settings PRIVATE
+ ${CMAKE_SOURCE_DIR}/geobase/user-settings/include
+)
+target_link_libraries(geobase-user-settings PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ cpp-string_utils-base64
+)
+target_sources(geobase-user-settings PRIVATE
+ ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/geo_point.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/gp_container.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/gpauto_container.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/util.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/y_cookie.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/ygo_container.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/yp_cookie.cpp
+ ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/ys_cookie.cpp
+)
diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt
new file mode 100644
index 0000000000..090bc525e7
--- /dev/null
+++ b/kernel/CMakeLists.txt
@@ -0,0 +1,16 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(blogs)
+add_subdirectory(hosts)
+add_subdirectory(indexann)
+add_subdirectory(langregion)
+add_subdirectory(mango)
+add_subdirectory(multilanguage_hosts)
+add_subdirectory(search_zone)
+add_subdirectory(urlnorm)
diff --git a/kernel/blogs/CMakeLists.txt b/kernel/blogs/CMakeLists.txt
new file mode 100644
index 0000000000..6d580ae9ad
--- /dev/null
+++ b/kernel/blogs/CMakeLists.txt
@@ -0,0 +1,9 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(protos)
diff --git a/kernel/blogs/protos/CMakeLists.darwin-x86_64.txt b/kernel/blogs/protos/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..8b38d6e6de
--- /dev/null
+++ b/kernel/blogs/protos/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,56 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(kernel-blogs-protos)
+target_link_libraries(kernel-blogs-protos PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-protobuf
+)
+target_proto_messages(kernel-blogs-protos PRIVATE
+ ${CMAKE_SOURCE_DIR}/kernel/blogs/protos/blogs.proto
+ ${CMAKE_SOURCE_DIR}/kernel/blogs/protos/rss.proto
+)
+target_proto_addincls(kernel-blogs-protos
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(kernel-blogs-protos
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/kernel/blogs/protos/CMakeLists.linux-aarch64.txt b/kernel/blogs/protos/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..c60cbb659f
--- /dev/null
+++ b/kernel/blogs/protos/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,57 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(kernel-blogs-protos)
+target_link_libraries(kernel-blogs-protos PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-protobuf
+)
+target_proto_messages(kernel-blogs-protos PRIVATE
+ ${CMAKE_SOURCE_DIR}/kernel/blogs/protos/blogs.proto
+ ${CMAKE_SOURCE_DIR}/kernel/blogs/protos/rss.proto
+)
+target_proto_addincls(kernel-blogs-protos
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(kernel-blogs-protos
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/kernel/blogs/protos/CMakeLists.linux-x86_64.txt b/kernel/blogs/protos/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..c60cbb659f
--- /dev/null
+++ b/kernel/blogs/protos/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,57 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(kernel-blogs-protos)
+target_link_libraries(kernel-blogs-protos PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-protobuf
+)
+target_proto_messages(kernel-blogs-protos PRIVATE
+ ${CMAKE_SOURCE_DIR}/kernel/blogs/protos/blogs.proto
+ ${CMAKE_SOURCE_DIR}/kernel/blogs/protos/rss.proto
+)
+target_proto_addincls(kernel-blogs-protos
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(kernel-blogs-protos
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/kernel/blogs/protos/CMakeLists.txt b/kernel/blogs/protos/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/kernel/blogs/protos/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/kernel/blogs/protos/CMakeLists.windows-x86_64.txt b/kernel/blogs/protos/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..8b38d6e6de
--- /dev/null
+++ b/kernel/blogs/protos/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,56 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(kernel-blogs-protos)
+target_link_libraries(kernel-blogs-protos PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-protobuf
+)
+target_proto_messages(kernel-blogs-protos PRIVATE
+ ${CMAKE_SOURCE_DIR}/kernel/blogs/protos/blogs.proto
+ ${CMAKE_SOURCE_DIR}/kernel/blogs/protos/rss.proto
+)
+target_proto_addincls(kernel-blogs-protos
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(kernel-blogs-protos
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/kernel/hosts/CMakeLists.txt b/kernel/hosts/CMakeLists.txt
new file mode 100644
index 0000000000..516c4594e0
--- /dev/null
+++ b/kernel/hosts/CMakeLists.txt
@@ -0,0 +1,9 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(owner)
diff --git a/kernel/hosts/owner/CMakeLists.darwin-x86_64.txt b/kernel/hosts/owner/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..cb414f86b7
--- /dev/null
+++ b/kernel/hosts/owner/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,53 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_archiver_bin
+ TOOL_archiver_dependency
+ tools/archiver
+ archiver
+)
+
+add_library(kernel-hosts-owner)
+target_include_directories(kernel-hosts-owner PUBLIC
+ ${CMAKE_BINARY_DIR}/kernel/hosts/owner
+)
+target_link_libraries(kernel-hosts-owner PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-archive
+ cpp-containers-str_hash
+ cpp-string_utils-url
+)
+target_sources(kernel-hosts-owner PRIVATE
+ ${CMAKE_SOURCE_DIR}/kernel/hosts/owner/owner.cpp
+ ${CMAKE_BINARY_DIR}/kernel/hosts/owner/urlrules.inc
+)
+add_custom_command(
+ OUTPUT
+ ${CMAKE_BINARY_DIR}/kernel/hosts/owner/urlrules.inc
+ DEPENDS
+ ${TOOL_archiver_bin}
+ ${CMAKE_SOURCE_DIR}/yweb/urlrules/areas.lst
+ ${CMAKE_SOURCE_DIR}/yweb/urlrules/2ld.list
+ ${CMAKE_SOURCE_DIR}/yweb/urlrules/ungrouped.list
+ COMMAND
+ ${TOOL_archiver_bin}
+ -q
+ -x
+ ${CMAKE_SOURCE_DIR}/yweb/urlrules/areas.lst:
+ ${CMAKE_SOURCE_DIR}/yweb/urlrules/2ld.list:
+ ${CMAKE_SOURCE_DIR}/yweb/urlrules/ungrouped.list:
+ -o
+ ${CMAKE_BINARY_DIR}/kernel/hosts/owner/urlrules.inc
+)
+if(NOT CMAKE_CROSSCOMPILING)
+ add_dependencies(kernel-hosts-owner
+ archiver
+)
+endif()
diff --git a/kernel/hosts/owner/CMakeLists.linux-aarch64.txt b/kernel/hosts/owner/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..7421b7739c
--- /dev/null
+++ b/kernel/hosts/owner/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,54 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_archiver_bin
+ TOOL_archiver_dependency
+ tools/archiver
+ archiver
+)
+
+add_library(kernel-hosts-owner)
+target_include_directories(kernel-hosts-owner PUBLIC
+ ${CMAKE_BINARY_DIR}/kernel/hosts/owner
+)
+target_link_libraries(kernel-hosts-owner PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-archive
+ cpp-containers-str_hash
+ cpp-string_utils-url
+)
+target_sources(kernel-hosts-owner PRIVATE
+ ${CMAKE_SOURCE_DIR}/kernel/hosts/owner/owner.cpp
+ ${CMAKE_BINARY_DIR}/kernel/hosts/owner/urlrules.inc
+)
+add_custom_command(
+ OUTPUT
+ ${CMAKE_BINARY_DIR}/kernel/hosts/owner/urlrules.inc
+ DEPENDS
+ ${TOOL_archiver_bin}
+ ${CMAKE_SOURCE_DIR}/yweb/urlrules/areas.lst
+ ${CMAKE_SOURCE_DIR}/yweb/urlrules/2ld.list
+ ${CMAKE_SOURCE_DIR}/yweb/urlrules/ungrouped.list
+ COMMAND
+ ${TOOL_archiver_bin}
+ -q
+ -x
+ ${CMAKE_SOURCE_DIR}/yweb/urlrules/areas.lst:
+ ${CMAKE_SOURCE_DIR}/yweb/urlrules/2ld.list:
+ ${CMAKE_SOURCE_DIR}/yweb/urlrules/ungrouped.list:
+ -o
+ ${CMAKE_BINARY_DIR}/kernel/hosts/owner/urlrules.inc
+)
+if(NOT CMAKE_CROSSCOMPILING)
+ add_dependencies(kernel-hosts-owner
+ archiver
+)
+endif()
diff --git a/kernel/hosts/owner/CMakeLists.linux-x86_64.txt b/kernel/hosts/owner/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..7421b7739c
--- /dev/null
+++ b/kernel/hosts/owner/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,54 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_archiver_bin
+ TOOL_archiver_dependency
+ tools/archiver
+ archiver
+)
+
+add_library(kernel-hosts-owner)
+target_include_directories(kernel-hosts-owner PUBLIC
+ ${CMAKE_BINARY_DIR}/kernel/hosts/owner
+)
+target_link_libraries(kernel-hosts-owner PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-archive
+ cpp-containers-str_hash
+ cpp-string_utils-url
+)
+target_sources(kernel-hosts-owner PRIVATE
+ ${CMAKE_SOURCE_DIR}/kernel/hosts/owner/owner.cpp
+ ${CMAKE_BINARY_DIR}/kernel/hosts/owner/urlrules.inc
+)
+add_custom_command(
+ OUTPUT
+ ${CMAKE_BINARY_DIR}/kernel/hosts/owner/urlrules.inc
+ DEPENDS
+ ${TOOL_archiver_bin}
+ ${CMAKE_SOURCE_DIR}/yweb/urlrules/areas.lst
+ ${CMAKE_SOURCE_DIR}/yweb/urlrules/2ld.list
+ ${CMAKE_SOURCE_DIR}/yweb/urlrules/ungrouped.list
+ COMMAND
+ ${TOOL_archiver_bin}
+ -q
+ -x
+ ${CMAKE_SOURCE_DIR}/yweb/urlrules/areas.lst:
+ ${CMAKE_SOURCE_DIR}/yweb/urlrules/2ld.list:
+ ${CMAKE_SOURCE_DIR}/yweb/urlrules/ungrouped.list:
+ -o
+ ${CMAKE_BINARY_DIR}/kernel/hosts/owner/urlrules.inc
+)
+if(NOT CMAKE_CROSSCOMPILING)
+ add_dependencies(kernel-hosts-owner
+ archiver
+)
+endif()
diff --git a/kernel/hosts/owner/CMakeLists.txt b/kernel/hosts/owner/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/kernel/hosts/owner/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/kernel/hosts/owner/CMakeLists.windows-x86_64.txt b/kernel/hosts/owner/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..cb414f86b7
--- /dev/null
+++ b/kernel/hosts/owner/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,53 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_archiver_bin
+ TOOL_archiver_dependency
+ tools/archiver
+ archiver
+)
+
+add_library(kernel-hosts-owner)
+target_include_directories(kernel-hosts-owner PUBLIC
+ ${CMAKE_BINARY_DIR}/kernel/hosts/owner
+)
+target_link_libraries(kernel-hosts-owner PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-archive
+ cpp-containers-str_hash
+ cpp-string_utils-url
+)
+target_sources(kernel-hosts-owner PRIVATE
+ ${CMAKE_SOURCE_DIR}/kernel/hosts/owner/owner.cpp
+ ${CMAKE_BINARY_DIR}/kernel/hosts/owner/urlrules.inc
+)
+add_custom_command(
+ OUTPUT
+ ${CMAKE_BINARY_DIR}/kernel/hosts/owner/urlrules.inc
+ DEPENDS
+ ${TOOL_archiver_bin}
+ ${CMAKE_SOURCE_DIR}/yweb/urlrules/areas.lst
+ ${CMAKE_SOURCE_DIR}/yweb/urlrules/2ld.list
+ ${CMAKE_SOURCE_DIR}/yweb/urlrules/ungrouped.list
+ COMMAND
+ ${TOOL_archiver_bin}
+ -q
+ -x
+ ${CMAKE_SOURCE_DIR}/yweb/urlrules/areas.lst:
+ ${CMAKE_SOURCE_DIR}/yweb/urlrules/2ld.list:
+ ${CMAKE_SOURCE_DIR}/yweb/urlrules/ungrouped.list:
+ -o
+ ${CMAKE_BINARY_DIR}/kernel/hosts/owner/urlrules.inc
+)
+if(NOT CMAKE_CROSSCOMPILING)
+ add_dependencies(kernel-hosts-owner
+ archiver
+)
+endif()
diff --git a/kernel/indexann/CMakeLists.txt b/kernel/indexann/CMakeLists.txt
new file mode 100644
index 0000000000..6d580ae9ad
--- /dev/null
+++ b/kernel/indexann/CMakeLists.txt
@@ -0,0 +1,9 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(protos)
diff --git a/kernel/indexann/protos/CMakeLists.darwin-x86_64.txt b/kernel/indexann/protos/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..b9a657cc4a
--- /dev/null
+++ b/kernel/indexann/protos/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,57 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(kernel-indexann-protos)
+target_link_libraries(kernel-indexann-protos PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ yt-interface-protos
+ contrib-libs-protobuf
+)
+target_proto_messages(kernel-indexann-protos PRIVATE
+ ${CMAKE_SOURCE_DIR}/kernel/indexann/protos/data.proto
+ ${CMAKE_SOURCE_DIR}/kernel/indexann/protos/portion.proto
+)
+target_proto_addincls(kernel-indexann-protos
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(kernel-indexann-protos
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/kernel/indexann/protos/CMakeLists.linux-aarch64.txt b/kernel/indexann/protos/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..2a0f142e64
--- /dev/null
+++ b/kernel/indexann/protos/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,58 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(kernel-indexann-protos)
+target_link_libraries(kernel-indexann-protos PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ yt-interface-protos
+ contrib-libs-protobuf
+)
+target_proto_messages(kernel-indexann-protos PRIVATE
+ ${CMAKE_SOURCE_DIR}/kernel/indexann/protos/data.proto
+ ${CMAKE_SOURCE_DIR}/kernel/indexann/protos/portion.proto
+)
+target_proto_addincls(kernel-indexann-protos
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(kernel-indexann-protos
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/kernel/indexann/protos/CMakeLists.linux-x86_64.txt b/kernel/indexann/protos/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..2a0f142e64
--- /dev/null
+++ b/kernel/indexann/protos/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,58 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(kernel-indexann-protos)
+target_link_libraries(kernel-indexann-protos PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ yt-interface-protos
+ contrib-libs-protobuf
+)
+target_proto_messages(kernel-indexann-protos PRIVATE
+ ${CMAKE_SOURCE_DIR}/kernel/indexann/protos/data.proto
+ ${CMAKE_SOURCE_DIR}/kernel/indexann/protos/portion.proto
+)
+target_proto_addincls(kernel-indexann-protos
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(kernel-indexann-protos
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/kernel/indexann/protos/CMakeLists.txt b/kernel/indexann/protos/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/kernel/indexann/protos/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/kernel/indexann/protos/CMakeLists.windows-x86_64.txt b/kernel/indexann/protos/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..b9a657cc4a
--- /dev/null
+++ b/kernel/indexann/protos/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,57 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(kernel-indexann-protos)
+target_link_libraries(kernel-indexann-protos PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ yt-interface-protos
+ contrib-libs-protobuf
+)
+target_proto_messages(kernel-indexann-protos PRIVATE
+ ${CMAKE_SOURCE_DIR}/kernel/indexann/protos/data.proto
+ ${CMAKE_SOURCE_DIR}/kernel/indexann/protos/portion.proto
+)
+target_proto_addincls(kernel-indexann-protos
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(kernel-indexann-protos
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/kernel/langregion/CMakeLists.darwin-x86_64.txt b/kernel/langregion/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..610427f4c4
--- /dev/null
+++ b/kernel/langregion/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,18 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(kernel-langregion)
+target_link_libraries(kernel-langregion PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-charset
+)
+target_sources(kernel-langregion PRIVATE
+ ${CMAKE_SOURCE_DIR}/kernel/langregion/langregion.cpp
+)
diff --git a/kernel/langregion/CMakeLists.linux-aarch64.txt b/kernel/langregion/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..513f55fda2
--- /dev/null
+++ b/kernel/langregion/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,19 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(kernel-langregion)
+target_link_libraries(kernel-langregion PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-charset
+)
+target_sources(kernel-langregion PRIVATE
+ ${CMAKE_SOURCE_DIR}/kernel/langregion/langregion.cpp
+)
diff --git a/kernel/langregion/CMakeLists.linux-x86_64.txt b/kernel/langregion/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..513f55fda2
--- /dev/null
+++ b/kernel/langregion/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,19 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(kernel-langregion)
+target_link_libraries(kernel-langregion PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-charset
+)
+target_sources(kernel-langregion PRIVATE
+ ${CMAKE_SOURCE_DIR}/kernel/langregion/langregion.cpp
+)
diff --git a/kernel/langregion/CMakeLists.txt b/kernel/langregion/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/kernel/langregion/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/kernel/langregion/CMakeLists.windows-x86_64.txt b/kernel/langregion/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..610427f4c4
--- /dev/null
+++ b/kernel/langregion/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,18 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(kernel-langregion)
+target_link_libraries(kernel-langregion PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-charset
+)
+target_sources(kernel-langregion PRIVATE
+ ${CMAKE_SOURCE_DIR}/kernel/langregion/langregion.cpp
+)
diff --git a/kernel/mango/CMakeLists.txt b/kernel/mango/CMakeLists.txt
new file mode 100644
index 0000000000..499930c4b0
--- /dev/null
+++ b/kernel/mango/CMakeLists.txt
@@ -0,0 +1,9 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(proto)
diff --git a/kernel/mango/proto/CMakeLists.darwin-x86_64.txt b/kernel/mango/proto/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..84532c4d8d
--- /dev/null
+++ b/kernel/mango/proto/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,176 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(kernel-mango-proto)
+target_link_libraries(kernel-mango-proto PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ kernel-blogs-protos
+ kernel-indexann-protos
+ cpp-langmask-proto
+ contrib-libs-protobuf
+)
+target_proto_messages(kernel-mango-proto PRIVATE
+ ${CMAKE_SOURCE_DIR}/kernel/mango/proto/author.proto
+ ${CMAKE_SOURCE_DIR}/kernel/mango/proto/authority.proto
+ ${CMAKE_SOURCE_DIR}/kernel/mango/proto/common.proto
+ ${CMAKE_SOURCE_DIR}/kernel/mango/proto/content.proto
+ ${CMAKE_SOURCE_DIR}/kernel/mango/proto/dl.proto
+ ${CMAKE_SOURCE_DIR}/kernel/mango/proto/fresh_feeds.proto
+ ${CMAKE_SOURCE_DIR}/kernel/mango/proto/ofeed.proto
+ ${CMAKE_SOURCE_DIR}/kernel/mango/proto/biased.proto
+ ${CMAKE_SOURCE_DIR}/kernel/mango/proto/quotes.proto
+ ${CMAKE_SOURCE_DIR}/kernel/mango/proto/trees.proto
+ ${CMAKE_SOURCE_DIR}/kernel/mango/proto/statistics.proto
+)
+target_proto_addincls(kernel-mango-proto
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(kernel-mango-proto
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/kernel/mango/proto/CMakeLists.linux-aarch64.txt b/kernel/mango/proto/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..4842433605
--- /dev/null
+++ b/kernel/mango/proto/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,177 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(kernel-mango-proto)
+target_link_libraries(kernel-mango-proto PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ kernel-blogs-protos
+ kernel-indexann-protos
+ cpp-langmask-proto
+ contrib-libs-protobuf
+)
+target_proto_messages(kernel-mango-proto PRIVATE
+ ${CMAKE_SOURCE_DIR}/kernel/mango/proto/author.proto
+ ${CMAKE_SOURCE_DIR}/kernel/mango/proto/authority.proto
+ ${CMAKE_SOURCE_DIR}/kernel/mango/proto/common.proto
+ ${CMAKE_SOURCE_DIR}/kernel/mango/proto/content.proto
+ ${CMAKE_SOURCE_DIR}/kernel/mango/proto/dl.proto
+ ${CMAKE_SOURCE_DIR}/kernel/mango/proto/fresh_feeds.proto
+ ${CMAKE_SOURCE_DIR}/kernel/mango/proto/ofeed.proto
+ ${CMAKE_SOURCE_DIR}/kernel/mango/proto/biased.proto
+ ${CMAKE_SOURCE_DIR}/kernel/mango/proto/quotes.proto
+ ${CMAKE_SOURCE_DIR}/kernel/mango/proto/trees.proto
+ ${CMAKE_SOURCE_DIR}/kernel/mango/proto/statistics.proto
+)
+target_proto_addincls(kernel-mango-proto
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(kernel-mango-proto
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/kernel/mango/proto/CMakeLists.linux-x86_64.txt b/kernel/mango/proto/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..4842433605
--- /dev/null
+++ b/kernel/mango/proto/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,177 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(kernel-mango-proto)
+target_link_libraries(kernel-mango-proto PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ kernel-blogs-protos
+ kernel-indexann-protos
+ cpp-langmask-proto
+ contrib-libs-protobuf
+)
+target_proto_messages(kernel-mango-proto PRIVATE
+ ${CMAKE_SOURCE_DIR}/kernel/mango/proto/author.proto
+ ${CMAKE_SOURCE_DIR}/kernel/mango/proto/authority.proto
+ ${CMAKE_SOURCE_DIR}/kernel/mango/proto/common.proto
+ ${CMAKE_SOURCE_DIR}/kernel/mango/proto/content.proto
+ ${CMAKE_SOURCE_DIR}/kernel/mango/proto/dl.proto
+ ${CMAKE_SOURCE_DIR}/kernel/mango/proto/fresh_feeds.proto
+ ${CMAKE_SOURCE_DIR}/kernel/mango/proto/ofeed.proto
+ ${CMAKE_SOURCE_DIR}/kernel/mango/proto/biased.proto
+ ${CMAKE_SOURCE_DIR}/kernel/mango/proto/quotes.proto
+ ${CMAKE_SOURCE_DIR}/kernel/mango/proto/trees.proto
+ ${CMAKE_SOURCE_DIR}/kernel/mango/proto/statistics.proto
+)
+target_proto_addincls(kernel-mango-proto
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(kernel-mango-proto
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/kernel/mango/proto/CMakeLists.txt b/kernel/mango/proto/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/kernel/mango/proto/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/kernel/mango/proto/CMakeLists.windows-x86_64.txt b/kernel/mango/proto/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..84532c4d8d
--- /dev/null
+++ b/kernel/mango/proto/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,176 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(kernel-mango-proto)
+target_link_libraries(kernel-mango-proto PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ kernel-blogs-protos
+ kernel-indexann-protos
+ cpp-langmask-proto
+ contrib-libs-protobuf
+)
+target_proto_messages(kernel-mango-proto PRIVATE
+ ${CMAKE_SOURCE_DIR}/kernel/mango/proto/author.proto
+ ${CMAKE_SOURCE_DIR}/kernel/mango/proto/authority.proto
+ ${CMAKE_SOURCE_DIR}/kernel/mango/proto/common.proto
+ ${CMAKE_SOURCE_DIR}/kernel/mango/proto/content.proto
+ ${CMAKE_SOURCE_DIR}/kernel/mango/proto/dl.proto
+ ${CMAKE_SOURCE_DIR}/kernel/mango/proto/fresh_feeds.proto
+ ${CMAKE_SOURCE_DIR}/kernel/mango/proto/ofeed.proto
+ ${CMAKE_SOURCE_DIR}/kernel/mango/proto/biased.proto
+ ${CMAKE_SOURCE_DIR}/kernel/mango/proto/quotes.proto
+ ${CMAKE_SOURCE_DIR}/kernel/mango/proto/trees.proto
+ ${CMAKE_SOURCE_DIR}/kernel/mango/proto/statistics.proto
+)
+target_proto_addincls(kernel-mango-proto
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(kernel-mango-proto
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/kernel/multilanguage_hosts/CMakeLists.darwin-x86_64.txt b/kernel/multilanguage_hosts/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..ca8f3d8057
--- /dev/null
+++ b/kernel/multilanguage_hosts/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,20 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(kernel-multilanguage_hosts)
+target_link_libraries(kernel-multilanguage_hosts PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ kernel-langregion
+ cpp-string_utils-url
+)
+target_sources(kernel-multilanguage_hosts PRIVATE
+ ${CMAKE_SOURCE_DIR}/kernel/multilanguage_hosts/multilanguage_hosts.cpp
+ ${CMAKE_SOURCE_DIR}/kernel/multilanguage_hosts/multilang_prefixes.cpp
+)
diff --git a/kernel/multilanguage_hosts/CMakeLists.linux-aarch64.txt b/kernel/multilanguage_hosts/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..b64c151d91
--- /dev/null
+++ b/kernel/multilanguage_hosts/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,21 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(kernel-multilanguage_hosts)
+target_link_libraries(kernel-multilanguage_hosts PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ kernel-langregion
+ cpp-string_utils-url
+)
+target_sources(kernel-multilanguage_hosts PRIVATE
+ ${CMAKE_SOURCE_DIR}/kernel/multilanguage_hosts/multilanguage_hosts.cpp
+ ${CMAKE_SOURCE_DIR}/kernel/multilanguage_hosts/multilang_prefixes.cpp
+)
diff --git a/kernel/multilanguage_hosts/CMakeLists.linux-x86_64.txt b/kernel/multilanguage_hosts/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..b64c151d91
--- /dev/null
+++ b/kernel/multilanguage_hosts/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,21 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(kernel-multilanguage_hosts)
+target_link_libraries(kernel-multilanguage_hosts PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ kernel-langregion
+ cpp-string_utils-url
+)
+target_sources(kernel-multilanguage_hosts PRIVATE
+ ${CMAKE_SOURCE_DIR}/kernel/multilanguage_hosts/multilanguage_hosts.cpp
+ ${CMAKE_SOURCE_DIR}/kernel/multilanguage_hosts/multilang_prefixes.cpp
+)
diff --git a/kernel/multilanguage_hosts/CMakeLists.txt b/kernel/multilanguage_hosts/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/kernel/multilanguage_hosts/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/kernel/multilanguage_hosts/CMakeLists.windows-x86_64.txt b/kernel/multilanguage_hosts/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..ca8f3d8057
--- /dev/null
+++ b/kernel/multilanguage_hosts/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,20 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(kernel-multilanguage_hosts)
+target_link_libraries(kernel-multilanguage_hosts PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ kernel-langregion
+ cpp-string_utils-url
+)
+target_sources(kernel-multilanguage_hosts PRIVATE
+ ${CMAKE_SOURCE_DIR}/kernel/multilanguage_hosts/multilanguage_hosts.cpp
+ ${CMAKE_SOURCE_DIR}/kernel/multilanguage_hosts/multilang_prefixes.cpp
+)
diff --git a/kernel/search_zone/CMakeLists.txt b/kernel/search_zone/CMakeLists.txt
new file mode 100644
index 0000000000..6d580ae9ad
--- /dev/null
+++ b/kernel/search_zone/CMakeLists.txt
@@ -0,0 +1,9 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(protos)
diff --git a/kernel/search_zone/protos/CMakeLists.darwin-x86_64.txt b/kernel/search_zone/protos/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..632f507298
--- /dev/null
+++ b/kernel/search_zone/protos/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,45 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(kernel-search_zone-protos)
+target_link_libraries(kernel-search_zone-protos PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ yt-interface-protos
+ yt_proto-yt-formats
+ contrib-libs-protobuf
+)
+target_proto_messages(kernel-search_zone-protos PRIVATE
+ ${CMAKE_SOURCE_DIR}/kernel/search_zone/protos/searchzone.proto
+)
+target_proto_addincls(kernel-search_zone-protos
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(kernel-search_zone-protos
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/kernel/search_zone/protos/CMakeLists.linux-aarch64.txt b/kernel/search_zone/protos/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..25049b81ee
--- /dev/null
+++ b/kernel/search_zone/protos/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,46 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(kernel-search_zone-protos)
+target_link_libraries(kernel-search_zone-protos PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ yt-interface-protos
+ yt_proto-yt-formats
+ contrib-libs-protobuf
+)
+target_proto_messages(kernel-search_zone-protos PRIVATE
+ ${CMAKE_SOURCE_DIR}/kernel/search_zone/protos/searchzone.proto
+)
+target_proto_addincls(kernel-search_zone-protos
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(kernel-search_zone-protos
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/kernel/search_zone/protos/CMakeLists.linux-x86_64.txt b/kernel/search_zone/protos/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..25049b81ee
--- /dev/null
+++ b/kernel/search_zone/protos/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,46 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(kernel-search_zone-protos)
+target_link_libraries(kernel-search_zone-protos PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ yt-interface-protos
+ yt_proto-yt-formats
+ contrib-libs-protobuf
+)
+target_proto_messages(kernel-search_zone-protos PRIVATE
+ ${CMAKE_SOURCE_DIR}/kernel/search_zone/protos/searchzone.proto
+)
+target_proto_addincls(kernel-search_zone-protos
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(kernel-search_zone-protos
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/kernel/search_zone/protos/CMakeLists.txt b/kernel/search_zone/protos/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/kernel/search_zone/protos/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/kernel/search_zone/protos/CMakeLists.windows-x86_64.txt b/kernel/search_zone/protos/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..632f507298
--- /dev/null
+++ b/kernel/search_zone/protos/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,45 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(kernel-search_zone-protos)
+target_link_libraries(kernel-search_zone-protos PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ yt-interface-protos
+ yt_proto-yt-formats
+ contrib-libs-protobuf
+)
+target_proto_messages(kernel-search_zone-protos PRIVATE
+ ${CMAKE_SOURCE_DIR}/kernel/search_zone/protos/searchzone.proto
+)
+target_proto_addincls(kernel-search_zone-protos
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(kernel-search_zone-protos
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/kernel/urlnorm/CMakeLists.darwin-x86_64.txt b/kernel/urlnorm/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..e3a91ac4ad
--- /dev/null
+++ b/kernel/urlnorm/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,39 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_ragel5_bin
+ TOOL_ragel5_dependency
+ contrib/tools/ragel5/ragel
+ ragel5
+)
+get_built_tool_path(
+ TOOL_rlgen-cd_bin
+ TOOL_rlgen-cd_dependency
+ contrib/tools/ragel5/rlgen-cd
+ rlgen-cd
+)
+
+add_library(kernel-urlnorm)
+target_link_libraries(kernel-urlnorm PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-cgiparam
+ cpp-digest-md5
+ cpp-string_utils-base64
+ cpp-string_utils-quote
+ cpp-string_utils-url
+ library-cpp-uri
+)
+target_sources(kernel-urlnorm PRIVATE
+ ${CMAKE_SOURCE_DIR}/kernel/urlnorm/host.cpp
+ ${CMAKE_SOURCE_DIR}/kernel/urlnorm/normalize.cpp
+ ${CMAKE_SOURCE_DIR}/kernel/urlnorm/urlnorm.cpp
+ ${CMAKE_SOURCE_DIR}/kernel/urlnorm/validate.cpp
+ ${CMAKE_BINARY_DIR}/kernel/urlnorm/urlhashval.rl5.cpp
+)
diff --git a/kernel/urlnorm/CMakeLists.linux-aarch64.txt b/kernel/urlnorm/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..d365d758ea
--- /dev/null
+++ b/kernel/urlnorm/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,40 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_ragel5_bin
+ TOOL_ragel5_dependency
+ contrib/tools/ragel5/ragel
+ ragel5
+)
+get_built_tool_path(
+ TOOL_rlgen-cd_bin
+ TOOL_rlgen-cd_dependency
+ contrib/tools/ragel5/rlgen-cd
+ rlgen-cd
+)
+
+add_library(kernel-urlnorm)
+target_link_libraries(kernel-urlnorm PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-cgiparam
+ cpp-digest-md5
+ cpp-string_utils-base64
+ cpp-string_utils-quote
+ cpp-string_utils-url
+ library-cpp-uri
+)
+target_sources(kernel-urlnorm PRIVATE
+ ${CMAKE_SOURCE_DIR}/kernel/urlnorm/host.cpp
+ ${CMAKE_SOURCE_DIR}/kernel/urlnorm/normalize.cpp
+ ${CMAKE_SOURCE_DIR}/kernel/urlnorm/urlnorm.cpp
+ ${CMAKE_SOURCE_DIR}/kernel/urlnorm/validate.cpp
+ ${CMAKE_BINARY_DIR}/kernel/urlnorm/urlhashval.rl5.cpp
+)
diff --git a/kernel/urlnorm/CMakeLists.linux-x86_64.txt b/kernel/urlnorm/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..d365d758ea
--- /dev/null
+++ b/kernel/urlnorm/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,40 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_ragel5_bin
+ TOOL_ragel5_dependency
+ contrib/tools/ragel5/ragel
+ ragel5
+)
+get_built_tool_path(
+ TOOL_rlgen-cd_bin
+ TOOL_rlgen-cd_dependency
+ contrib/tools/ragel5/rlgen-cd
+ rlgen-cd
+)
+
+add_library(kernel-urlnorm)
+target_link_libraries(kernel-urlnorm PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-cgiparam
+ cpp-digest-md5
+ cpp-string_utils-base64
+ cpp-string_utils-quote
+ cpp-string_utils-url
+ library-cpp-uri
+)
+target_sources(kernel-urlnorm PRIVATE
+ ${CMAKE_SOURCE_DIR}/kernel/urlnorm/host.cpp
+ ${CMAKE_SOURCE_DIR}/kernel/urlnorm/normalize.cpp
+ ${CMAKE_SOURCE_DIR}/kernel/urlnorm/urlnorm.cpp
+ ${CMAKE_SOURCE_DIR}/kernel/urlnorm/validate.cpp
+ ${CMAKE_BINARY_DIR}/kernel/urlnorm/urlhashval.rl5.cpp
+)
diff --git a/kernel/urlnorm/CMakeLists.txt b/kernel/urlnorm/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/kernel/urlnorm/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/kernel/urlnorm/CMakeLists.windows-x86_64.txt b/kernel/urlnorm/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..e3a91ac4ad
--- /dev/null
+++ b/kernel/urlnorm/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,39 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_ragel5_bin
+ TOOL_ragel5_dependency
+ contrib/tools/ragel5/ragel
+ ragel5
+)
+get_built_tool_path(
+ TOOL_rlgen-cd_bin
+ TOOL_rlgen-cd_dependency
+ contrib/tools/ragel5/rlgen-cd
+ rlgen-cd
+)
+
+add_library(kernel-urlnorm)
+target_link_libraries(kernel-urlnorm PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-cgiparam
+ cpp-digest-md5
+ cpp-string_utils-base64
+ cpp-string_utils-quote
+ cpp-string_utils-url
+ library-cpp-uri
+)
+target_sources(kernel-urlnorm PRIVATE
+ ${CMAKE_SOURCE_DIR}/kernel/urlnorm/host.cpp
+ ${CMAKE_SOURCE_DIR}/kernel/urlnorm/normalize.cpp
+ ${CMAKE_SOURCE_DIR}/kernel/urlnorm/urlnorm.cpp
+ ${CMAKE_SOURCE_DIR}/kernel/urlnorm/validate.cpp
+ ${CMAKE_BINARY_DIR}/kernel/urlnorm/urlhashval.rl5.cpp
+)
diff --git a/library/cpp/CMakeLists.darwin-x86_64.txt b/library/cpp/CMakeLists.darwin-x86_64.txt
index 772027a342..5497fd21be 100644
--- a/library/cpp/CMakeLists.darwin-x86_64.txt
+++ b/library/cpp/CMakeLists.darwin-x86_64.txt
@@ -36,6 +36,9 @@ add_subdirectory(disjoint_sets)
add_subdirectory(dns)
add_subdirectory(enumbitset)
add_subdirectory(execprofile)
+add_subdirectory(geo)
+add_subdirectory(geobase)
+add_subdirectory(geohash)
add_subdirectory(getopt)
add_subdirectory(grpc)
add_subdirectory(histogram)
@@ -44,9 +47,11 @@ add_subdirectory(http)
add_subdirectory(hyperloglog)
add_subdirectory(int128)
add_subdirectory(ipmath)
+add_subdirectory(ipreg)
add_subdirectory(ipv6_address)
add_subdirectory(iterator)
add_subdirectory(json)
+add_subdirectory(langmask)
add_subdirectory(lcs)
add_subdirectory(lfalloc)
add_subdirectory(linear_regression)
@@ -55,6 +60,7 @@ add_subdirectory(lua)
add_subdirectory(lwtrace)
add_subdirectory(malloc)
add_subdirectory(messagebus)
+add_subdirectory(microbdb)
add_subdirectory(mime)
add_subdirectory(monlib)
add_subdirectory(on_disk)
@@ -68,6 +74,8 @@ add_subdirectory(random_provider)
add_subdirectory(regex)
add_subdirectory(resource)
add_subdirectory(retry)
+add_subdirectory(reverse_geocoder)
+add_subdirectory(robots_txt)
add_subdirectory(sanitizer)
add_subdirectory(scheme)
add_subdirectory(sighandler)
@@ -90,6 +98,7 @@ add_subdirectory(unified_agent_client)
add_subdirectory(uri)
add_subdirectory(xml)
add_subdirectory(yaml)
+add_subdirectory(yconf)
add_subdirectory(yson)
add_subdirectory(yson_pull)
add_subdirectory(yt)
diff --git a/library/cpp/CMakeLists.linux-aarch64.txt b/library/cpp/CMakeLists.linux-aarch64.txt
index cd50b0e3a4..5e93629802 100644
--- a/library/cpp/CMakeLists.linux-aarch64.txt
+++ b/library/cpp/CMakeLists.linux-aarch64.txt
@@ -35,6 +35,9 @@ add_subdirectory(disjoint_sets)
add_subdirectory(dns)
add_subdirectory(enumbitset)
add_subdirectory(execprofile)
+add_subdirectory(geo)
+add_subdirectory(geobase)
+add_subdirectory(geohash)
add_subdirectory(getopt)
add_subdirectory(grpc)
add_subdirectory(histogram)
@@ -43,9 +46,11 @@ add_subdirectory(http)
add_subdirectory(hyperloglog)
add_subdirectory(int128)
add_subdirectory(ipmath)
+add_subdirectory(ipreg)
add_subdirectory(ipv6_address)
add_subdirectory(iterator)
add_subdirectory(json)
+add_subdirectory(langmask)
add_subdirectory(lcs)
add_subdirectory(lfalloc)
add_subdirectory(linear_regression)
@@ -54,6 +59,7 @@ add_subdirectory(lua)
add_subdirectory(lwtrace)
add_subdirectory(malloc)
add_subdirectory(messagebus)
+add_subdirectory(microbdb)
add_subdirectory(mime)
add_subdirectory(monlib)
add_subdirectory(on_disk)
@@ -67,6 +73,8 @@ add_subdirectory(random_provider)
add_subdirectory(regex)
add_subdirectory(resource)
add_subdirectory(retry)
+add_subdirectory(reverse_geocoder)
+add_subdirectory(robots_txt)
add_subdirectory(sanitizer)
add_subdirectory(scheme)
add_subdirectory(sighandler)
@@ -89,6 +97,7 @@ add_subdirectory(unified_agent_client)
add_subdirectory(uri)
add_subdirectory(xml)
add_subdirectory(yaml)
+add_subdirectory(yconf)
add_subdirectory(yson)
add_subdirectory(yson_pull)
add_subdirectory(yt)
diff --git a/library/cpp/CMakeLists.linux-x86_64.txt b/library/cpp/CMakeLists.linux-x86_64.txt
index 772027a342..5497fd21be 100644
--- a/library/cpp/CMakeLists.linux-x86_64.txt
+++ b/library/cpp/CMakeLists.linux-x86_64.txt
@@ -36,6 +36,9 @@ add_subdirectory(disjoint_sets)
add_subdirectory(dns)
add_subdirectory(enumbitset)
add_subdirectory(execprofile)
+add_subdirectory(geo)
+add_subdirectory(geobase)
+add_subdirectory(geohash)
add_subdirectory(getopt)
add_subdirectory(grpc)
add_subdirectory(histogram)
@@ -44,9 +47,11 @@ add_subdirectory(http)
add_subdirectory(hyperloglog)
add_subdirectory(int128)
add_subdirectory(ipmath)
+add_subdirectory(ipreg)
add_subdirectory(ipv6_address)
add_subdirectory(iterator)
add_subdirectory(json)
+add_subdirectory(langmask)
add_subdirectory(lcs)
add_subdirectory(lfalloc)
add_subdirectory(linear_regression)
@@ -55,6 +60,7 @@ add_subdirectory(lua)
add_subdirectory(lwtrace)
add_subdirectory(malloc)
add_subdirectory(messagebus)
+add_subdirectory(microbdb)
add_subdirectory(mime)
add_subdirectory(monlib)
add_subdirectory(on_disk)
@@ -68,6 +74,8 @@ add_subdirectory(random_provider)
add_subdirectory(regex)
add_subdirectory(resource)
add_subdirectory(retry)
+add_subdirectory(reverse_geocoder)
+add_subdirectory(robots_txt)
add_subdirectory(sanitizer)
add_subdirectory(scheme)
add_subdirectory(sighandler)
@@ -90,6 +98,7 @@ add_subdirectory(unified_agent_client)
add_subdirectory(uri)
add_subdirectory(xml)
add_subdirectory(yaml)
+add_subdirectory(yconf)
add_subdirectory(yson)
add_subdirectory(yson_pull)
add_subdirectory(yt)
diff --git a/library/cpp/CMakeLists.windows-x86_64.txt b/library/cpp/CMakeLists.windows-x86_64.txt
index 772027a342..5497fd21be 100644
--- a/library/cpp/CMakeLists.windows-x86_64.txt
+++ b/library/cpp/CMakeLists.windows-x86_64.txt
@@ -36,6 +36,9 @@ add_subdirectory(disjoint_sets)
add_subdirectory(dns)
add_subdirectory(enumbitset)
add_subdirectory(execprofile)
+add_subdirectory(geo)
+add_subdirectory(geobase)
+add_subdirectory(geohash)
add_subdirectory(getopt)
add_subdirectory(grpc)
add_subdirectory(histogram)
@@ -44,9 +47,11 @@ add_subdirectory(http)
add_subdirectory(hyperloglog)
add_subdirectory(int128)
add_subdirectory(ipmath)
+add_subdirectory(ipreg)
add_subdirectory(ipv6_address)
add_subdirectory(iterator)
add_subdirectory(json)
+add_subdirectory(langmask)
add_subdirectory(lcs)
add_subdirectory(lfalloc)
add_subdirectory(linear_regression)
@@ -55,6 +60,7 @@ add_subdirectory(lua)
add_subdirectory(lwtrace)
add_subdirectory(malloc)
add_subdirectory(messagebus)
+add_subdirectory(microbdb)
add_subdirectory(mime)
add_subdirectory(monlib)
add_subdirectory(on_disk)
@@ -68,6 +74,8 @@ add_subdirectory(random_provider)
add_subdirectory(regex)
add_subdirectory(resource)
add_subdirectory(retry)
+add_subdirectory(reverse_geocoder)
+add_subdirectory(robots_txt)
add_subdirectory(sanitizer)
add_subdirectory(scheme)
add_subdirectory(sighandler)
@@ -90,6 +98,7 @@ add_subdirectory(unified_agent_client)
add_subdirectory(uri)
add_subdirectory(xml)
add_subdirectory(yaml)
+add_subdirectory(yconf)
add_subdirectory(yson)
add_subdirectory(yson_pull)
add_subdirectory(yt)
diff --git a/library/cpp/containers/CMakeLists.txt b/library/cpp/containers/CMakeLists.txt
index 43fcbe8346..40f5013867 100644
--- a/library/cpp/containers/CMakeLists.txt
+++ b/library/cpp/containers/CMakeLists.txt
@@ -20,5 +20,6 @@ add_subdirectory(ring_buffer)
add_subdirectory(sorted_vector)
add_subdirectory(stack_array)
add_subdirectory(stack_vector)
+add_subdirectory(str_hash)
add_subdirectory(str_map)
add_subdirectory(top_keeper)
diff --git a/library/cpp/containers/str_hash/CMakeLists.darwin-x86_64.txt b/library/cpp/containers/str_hash/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..627814f0ed
--- /dev/null
+++ b/library/cpp/containers/str_hash/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,19 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-containers-str_hash)
+target_link_libraries(cpp-containers-str_hash PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-charset
+ cpp-containers-str_map
+)
+target_sources(cpp-containers-str_hash PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/containers/str_hash/str_hash.cpp
+)
diff --git a/library/cpp/containers/str_hash/CMakeLists.linux-aarch64.txt b/library/cpp/containers/str_hash/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..cd723cbea2
--- /dev/null
+++ b/library/cpp/containers/str_hash/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,20 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-containers-str_hash)
+target_link_libraries(cpp-containers-str_hash PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-charset
+ cpp-containers-str_map
+)
+target_sources(cpp-containers-str_hash PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/containers/str_hash/str_hash.cpp
+)
diff --git a/library/cpp/containers/str_hash/CMakeLists.linux-x86_64.txt b/library/cpp/containers/str_hash/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..cd723cbea2
--- /dev/null
+++ b/library/cpp/containers/str_hash/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,20 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-containers-str_hash)
+target_link_libraries(cpp-containers-str_hash PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-charset
+ cpp-containers-str_map
+)
+target_sources(cpp-containers-str_hash PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/containers/str_hash/str_hash.cpp
+)
diff --git a/library/cpp/containers/str_hash/CMakeLists.txt b/library/cpp/containers/str_hash/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/library/cpp/containers/str_hash/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/library/cpp/containers/str_hash/CMakeLists.windows-x86_64.txt b/library/cpp/containers/str_hash/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..627814f0ed
--- /dev/null
+++ b/library/cpp/containers/str_hash/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,19 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-containers-str_hash)
+target_link_libraries(cpp-containers-str_hash PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-charset
+ cpp-containers-str_map
+)
+target_sources(cpp-containers-str_hash PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/containers/str_hash/str_hash.cpp
+)
diff --git a/library/cpp/containers/str_hash/str_hash.cpp b/library/cpp/containers/str_hash/str_hash.cpp
new file mode 100644
index 0000000000..1298638533
--- /dev/null
+++ b/library/cpp/containers/str_hash/str_hash.cpp
@@ -0,0 +1,60 @@
+#include "str_hash.h"
+
+#include <library/cpp/charset/ci_string.h>
+#include <util/stream/output.h>
+#include <util/stream/input.h>
+
+HashSet::HashSet(const char** array, size_type size) {
+ Resize(size);
+ while (*array && **array)
+ AddPermanent(*array++);
+}
+
+void HashSet::Read(IInputStream* input) {
+ TString s;
+
+ while (input->ReadLine(s)) {
+ AddUniq(TCiString(s).c_str());
+ }
+}
+
+void HashSet::Write(IOutputStream* output) const {
+ for (const auto& it : *this) {
+ *output << it.first << "\n";
+ }
+}
+
+#ifdef TEST_STRHASH
+#include <ctime>
+#include <fstream>
+#include <cstdio>
+#include <cstdlib>
+
+using namespace std;
+
+int main(int argc, char* argv[]) {
+ if (argc < 2) {
+ printf("usage: stoplist <stop-words file ...\n");
+ exit(EXIT_FAILURE); // FreeBSD: EX_USAGE
+ }
+ Hash hash;
+ hash.Read(cin);
+ for (--argc, ++argv; argc > 0; --argc, ++argv) {
+ ifstream input(argv[0]);
+ if (!input.good()) {
+ perror(argv[0]);
+ continue;
+ }
+ TCiString s;
+ while (input >> s) {
+ if (!hash.Has(s))
+ cout << s << "\n";
+ else
+ cout << "[[" << s << "]]"
+ << "\n";
+ }
+ }
+ return EXIT_SUCCESS; // EX_OK
+}
+
+#endif
diff --git a/library/cpp/containers/str_hash/str_hash.h b/library/cpp/containers/str_hash/str_hash.h
new file mode 100644
index 0000000000..25f960dbb5
--- /dev/null
+++ b/library/cpp/containers/str_hash/str_hash.h
@@ -0,0 +1,181 @@
+#pragma once
+
+#include <library/cpp/containers/str_map/str_map.h>
+#include <library/cpp/charset/ci_string.h>
+#include <util/system/yassert.h>
+#include <util/memory/tempbuf.h>
+
+#include <memory>
+
+class IInputStream;
+class IOutputStream;
+
+template <class T, class Alloc = std::allocator<const char*>>
+class Hash;
+
+struct yvoid {
+ yvoid() = default;
+};
+
+template <typename T, class Alloc>
+class Hash: public string_hash<T, ci_hash, ci_equal_to, Alloc> {
+ using ci_string_hash = string_hash<T, ci_hash, ci_equal_to, Alloc>;
+
+protected:
+ using ci_string_hash::pool;
+
+public:
+ using size_type = typename ci_string_hash::size_type;
+ using const_iterator = typename ci_string_hash::const_iterator;
+ using iterator = typename ci_string_hash::iterator;
+ using value_type = typename ci_string_hash::value_type;
+ using ci_string_hash::begin;
+ using ci_string_hash::end;
+ using ci_string_hash::find;
+ using ci_string_hash::size;
+
+ Hash()
+ : ci_string_hash()
+ {
+ }
+ explicit Hash(size_type theSize)
+ : ci_string_hash(theSize, theSize * AVERAGEWORD_BUF)
+ {
+ }
+ Hash(const char** strings, size_type size = 0, T* = 0); // must end with NULL or "\0"
+ virtual ~Hash();
+ bool Has(const char* s, size_t len, T* pp = nullptr) const;
+ bool Has(const char* s, T* pp = nullptr) const {
+ const_iterator it;
+ if ((it = find(s)) == end())
+ return false;
+ else if (pp)
+ *pp = (*it).second;
+ return true;
+ }
+ void Add(const char* s, T data) {
+ // in fact it is the same insert_unique as in AddUnique.
+ // it's impossible to have _FAST_ version of insert() in 'hash_map'
+
+ // you have to use 'hash_mmap' to get the _kind_ of desired effect.
+ // BUT still there will be "Checks" inside -
+ // to make the same keys close to each other (see insert_equal())
+ this->insert_copy(s, data);
+ }
+ bool AddUniq(const char* s, T data) {
+ return this->insert_copy(s, data).second;
+ }
+ // new function to get rid of allocations completely! -- e.g. in constructors
+ void AddPermanent(const char* s, T data) {
+ this->insert(value_type(s, data));
+ }
+ T Detach(const char* s) {
+ iterator it = find(s);
+ if (it == end())
+ return T();
+ T data = (*it).second;
+ this->erase(it);
+ return data;
+ }
+ size_type NumEntries() const {
+ return size();
+ }
+ bool ForEach(bool (*func)(const char* key, T data, void* cookie), void* cookie = nullptr);
+ void Resize(size_type theSize) {
+ this->reserve(theSize);
+ // no pool resizing here.
+ }
+ virtual void Clear();
+ char* Pool() {
+ if (pool.Size() < 2 || pool.End()[-2] != '\0')
+ pool.Append("\0", 1);
+ return pool.Begin();
+ }
+};
+
+template <class T, class Alloc>
+Hash<T, Alloc>::Hash(const char** array, size_type theSize, T* data) {
+ // must end with NULL or "\0"
+ Y_ASSERT(data != nullptr);
+ Resize(theSize);
+ while (*array && **array)
+ AddPermanent(*array++, *data++);
+}
+
+template <class T, class Alloc>
+bool Hash<T, Alloc>::Has(const char* s, size_t len, T* pp) const {
+ TTempArray<char> buf(len + 1);
+ char* const allocated = buf.Data();
+ memcpy(allocated, s, len);
+ allocated[len] = '\x00';
+ return Has(allocated, pp);
+}
+
+template <class T, class Alloc>
+Hash<T, Alloc>::~Hash() {
+ Clear();
+}
+
+template <class T, class Alloc>
+void Hash<T, Alloc>::Clear() {
+ ci_string_hash::clear_hash(); // to make the key pool empty
+}
+
+template <class T, class Alloc>
+bool Hash<T, Alloc>::ForEach(bool (*func)(const char* key, T data, void* cookie), void* cookie) {
+ for (const_iterator it = begin(); it != end(); ++it)
+ if (!func((*it).first, (*it).second, cookie))
+ return false;
+ return true;
+}
+
+class HashSet: public Hash<yvoid> {
+public:
+ HashSet(const char** array, size_type size = 0);
+ HashSet()
+ : Hash<yvoid>()
+ {
+ }
+ void Read(IInputStream* input);
+ void Write(IOutputStream* output) const;
+ void Add(const char* s) {
+ // in fact it is the same insert_unique as in AddUnique.
+ // it's impossible to have _FAST_ version of insert() in 'hash_map'
+
+ // you have to use 'hash_mmap' to get the _kind_ of desired effect.
+ // BUT still there will be "Checks" inside -
+ // to make the same keys close to each other (see insert_equal())
+ insert_copy(s, yvoid());
+ }
+ bool AddUniq(const char* s) {
+ return insert_copy(s, yvoid()).second;
+ }
+ // new function to get rid of allocations completely! -- e.g. in constructors
+ void AddPermanent(const char* s) {
+ insert(value_type(s, yvoid()));
+ }
+};
+
+template <class T, class HashFcn = THash<T>, class EqualKey = TEqualTo<T>, class Alloc = std::allocator<T>>
+class TStaticHash: private THashMap<T, T, HashFcn, EqualKey> {
+private:
+ using TBase = THashMap<T, T, HashFcn, EqualKey>;
+
+public:
+ TStaticHash(T arr[][2], size_t size) {
+ TBase::reserve(size);
+ while (size) {
+ TBase::insert(typename TBase::value_type(arr[0][0], arr[0][1]));
+ arr++;
+ size--;
+ }
+ }
+ T operator[](const T& key) const { // !!! it is not lvalue nor it used to be
+ typename TBase::const_iterator it = TBase::find(key);
+ if (it == TBase::end())
+ return nullptr;
+ return it->second;
+ }
+};
+
+using TStHash = TStaticHash<const char*, ci_hash, ci_equal_to>;
diff --git a/library/cpp/containers/str_hash/ya.make b/library/cpp/containers/str_hash/ya.make
new file mode 100644
index 0000000000..f7e24316b9
--- /dev/null
+++ b/library/cpp/containers/str_hash/ya.make
@@ -0,0 +1,12 @@
+LIBRARY()
+
+PEERDIR(
+ library/cpp/charset
+ library/cpp/containers/str_map
+)
+
+SRCS(
+ str_hash.cpp
+)
+
+END()
diff --git a/library/cpp/deprecated/CMakeLists.txt b/library/cpp/deprecated/CMakeLists.txt
index ad818e3662..765ea6aad7 100644
--- a/library/cpp/deprecated/CMakeLists.txt
+++ b/library/cpp/deprecated/CMakeLists.txt
@@ -8,6 +8,10 @@
add_subdirectory(accessors)
add_subdirectory(atomic)
+add_subdirectory(autoarray)
+add_subdirectory(datafile)
add_subdirectory(enum_codegen)
+add_subdirectory(fgood)
add_subdirectory(kmp)
+add_subdirectory(mapped_file)
add_subdirectory(split)
diff --git a/library/cpp/deprecated/autoarray/CMakeLists.darwin-x86_64.txt b/library/cpp/deprecated/autoarray/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..f2a246218c
--- /dev/null
+++ b/library/cpp/deprecated/autoarray/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-deprecated-autoarray)
+target_link_libraries(cpp-deprecated-autoarray PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+)
+target_sources(cpp-deprecated-autoarray PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/autoarray/autoarray.cpp
+)
diff --git a/library/cpp/deprecated/autoarray/CMakeLists.linux-aarch64.txt b/library/cpp/deprecated/autoarray/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..2411a48cd3
--- /dev/null
+++ b/library/cpp/deprecated/autoarray/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,18 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-deprecated-autoarray)
+target_link_libraries(cpp-deprecated-autoarray PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+)
+target_sources(cpp-deprecated-autoarray PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/autoarray/autoarray.cpp
+)
diff --git a/library/cpp/deprecated/autoarray/CMakeLists.linux-x86_64.txt b/library/cpp/deprecated/autoarray/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..2411a48cd3
--- /dev/null
+++ b/library/cpp/deprecated/autoarray/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,18 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-deprecated-autoarray)
+target_link_libraries(cpp-deprecated-autoarray PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+)
+target_sources(cpp-deprecated-autoarray PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/autoarray/autoarray.cpp
+)
diff --git a/library/cpp/deprecated/autoarray/CMakeLists.txt b/library/cpp/deprecated/autoarray/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/library/cpp/deprecated/autoarray/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/library/cpp/deprecated/autoarray/CMakeLists.windows-x86_64.txt b/library/cpp/deprecated/autoarray/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..f2a246218c
--- /dev/null
+++ b/library/cpp/deprecated/autoarray/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-deprecated-autoarray)
+target_link_libraries(cpp-deprecated-autoarray PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+)
+target_sources(cpp-deprecated-autoarray PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/autoarray/autoarray.cpp
+)
diff --git a/library/cpp/deprecated/autoarray/README.md b/library/cpp/deprecated/autoarray/README.md
new file mode 100644
index 0000000000..1d83147cee
--- /dev/null
+++ b/library/cpp/deprecated/autoarray/README.md
@@ -0,0 +1,3 @@
+Pre-C++11 vector-like container.
+
+Just use std::vector. If you need to fill your vector with custom-constructed data, use reserve+emplace_back (but make sure that your elements are movable).
diff --git a/library/cpp/deprecated/autoarray/autoarray.cpp b/library/cpp/deprecated/autoarray/autoarray.cpp
new file mode 100644
index 0000000000..15167f27f6
--- /dev/null
+++ b/library/cpp/deprecated/autoarray/autoarray.cpp
@@ -0,0 +1 @@
+#include "autoarray.h"
diff --git a/library/cpp/deprecated/autoarray/autoarray.h b/library/cpp/deprecated/autoarray/autoarray.h
new file mode 100644
index 0000000000..2aa12c5916
--- /dev/null
+++ b/library/cpp/deprecated/autoarray/autoarray.h
@@ -0,0 +1,264 @@
+#pragma once
+
+#include <util/system/compat.h>
+#include <util/system/yassert.h>
+#include <util/system/defaults.h>
+#include <util/system/sys_alloc.h>
+
+#include <util/generic/typetraits.h>
+#include <utility>
+
+#include <new>
+#include <util/generic/noncopyable.h>
+
+struct autoarray_getindex {
+ autoarray_getindex() = default;
+};
+
+struct aarr_b0 {
+ aarr_b0() = default;
+};
+
+struct aarr_nofill {
+ aarr_nofill() = default;
+};
+
+template <typename T>
+struct ynd_type_traits {
+ enum {
+ empty_destructor = TTypeTraits<T>::IsPod,
+ };
+};
+
+template <class T>
+class autoarray : TNonCopyable {
+protected:
+ T* arr;
+ size_t _size;
+
+private:
+ void AllocBuf(size_t siz) {
+ arr = nullptr;
+ _size = 0;
+ if (siz) {
+ arr = (T*)y_allocate(sizeof(T) * siz);
+ _size = siz;
+ }
+ }
+
+public:
+ using value_type = T;
+ using iterator = T*;
+ using const_iterator = const T*;
+
+ autoarray()
+ : arr(nullptr)
+ , _size(0)
+ {
+ }
+ autoarray(size_t siz) {
+ AllocBuf(siz);
+ T* curr = arr;
+ try {
+ for (T* end = arr + _size; curr != end; ++curr)
+ new (curr) T();
+ } catch (...) {
+ for (--curr; curr >= arr; --curr)
+ curr->~T();
+ y_deallocate(arr);
+ throw;
+ }
+ }
+ template <class A>
+ explicit autoarray(size_t siz, A& fill) {
+ AllocBuf(siz);
+ T* curr = arr;
+ try {
+ for (T* end = arr + _size; curr != end; ++curr)
+ new (curr) T(fill);
+ } catch (...) {
+ for (--curr; curr >= arr; --curr)
+ curr->~T();
+ y_deallocate(arr);
+ throw;
+ }
+ }
+ explicit autoarray(size_t siz, autoarray_getindex) {
+ AllocBuf(siz);
+ size_t nCurrent = 0;
+ try {
+ for (nCurrent = 0; nCurrent < _size; ++nCurrent)
+ new (&arr[nCurrent]) T(nCurrent);
+ } catch (...) {
+ for (size_t n = 0; n < nCurrent; ++n)
+ arr[n].~T();
+ y_deallocate(arr);
+ throw;
+ }
+ }
+ explicit autoarray(size_t siz, aarr_b0) {
+ AllocBuf(siz);
+ memset(arr, 0, _size * sizeof(T));
+ }
+ explicit autoarray(size_t siz, aarr_nofill) {
+ AllocBuf(siz);
+ }
+ template <class A>
+ explicit autoarray(const A* fill, size_t siz) {
+ AllocBuf(siz);
+ size_t nCurrent = 0;
+ try {
+ for (nCurrent = 0; nCurrent < _size; ++nCurrent)
+ new (&arr[nCurrent]) T(fill[nCurrent]);
+ } catch (...) {
+ for (size_t n = 0; n < nCurrent; ++n)
+ arr[n].~T();
+ y_deallocate(arr);
+ throw;
+ }
+ }
+ template <class A, class B>
+ explicit autoarray(const A* fill, const B* cfill, size_t siz) {
+ AllocBuf(siz);
+ size_t nCurrent = 0;
+ try {
+ for (nCurrent = 0; nCurrent < _size; ++nCurrent)
+ new (&arr[nCurrent]) T(fill[nCurrent], cfill);
+ } catch (...) {
+ for (size_t n = 0; n < nCurrent; ++n)
+ arr[n].~T();
+ y_deallocate(arr);
+ throw;
+ }
+ }
+ template <class A>
+ explicit autoarray(const A* fill, size_t initsiz, size_t fullsiz) {
+ AllocBuf(fullsiz);
+ size_t nCurrent = 0;
+ try {
+ for (nCurrent = 0; nCurrent < ((initsiz < _size) ? initsiz : _size); ++nCurrent)
+ new (&arr[nCurrent]) T(fill[nCurrent]);
+ for (; nCurrent < _size; ++nCurrent)
+ new (&arr[nCurrent]) T();
+ } catch (...) {
+ for (size_t n = 0; n < nCurrent; ++n)
+ arr[n].~T();
+ y_deallocate(arr);
+ throw;
+ }
+ }
+ template <class A>
+ explicit autoarray(const A* fill, size_t initsiz, size_t fullsiz, const T& dummy) {
+ AllocBuf(fullsiz);
+ size_t nCurrent = 0;
+ try {
+ for (nCurrent = 0; nCurrent < ((initsiz < _size) ? initsiz : _size); ++nCurrent)
+ new (&arr[nCurrent]) T(fill[nCurrent]);
+ for (; nCurrent < _size; ++nCurrent)
+ new (&arr[nCurrent]) T(dummy);
+ } catch (...) {
+ for (size_t n = 0; n < nCurrent; ++n)
+ arr[n].~T();
+ y_deallocate(arr);
+ throw;
+ }
+ }
+
+ template <class... R>
+ explicit autoarray(size_t siz, R&&... fill) {
+ AllocBuf(siz);
+ T* curr = arr;
+ try {
+ for (T* end = arr + _size; curr != end; ++curr)
+ new (curr) T(std::forward<R>(fill)...);
+ } catch (...) {
+ for (--curr; curr >= arr; --curr)
+ curr->~T();
+ y_deallocate(arr);
+ throw;
+ }
+ }
+ ~autoarray() {
+ if (_size) {
+ if (!ynd_type_traits<T>::empty_destructor)
+ for (T *curr = arr, *end = arr + _size; curr != end; ++curr)
+ curr->~T();
+ y_deallocate(arr);
+ }
+ }
+ T& operator[](size_t pos) {
+ Y_ASSERT(pos < _size);
+ return arr[pos];
+ }
+ const T& operator[](size_t pos) const {
+ Y_ASSERT(pos < _size);
+ return arr[pos];
+ }
+ size_t size() const {
+ return _size;
+ }
+ void swap(autoarray& with) {
+ T* tmp_arr = arr;
+ size_t tmp_size = _size;
+ arr = with.arr;
+ _size = with._size;
+ with.arr = tmp_arr;
+ with._size = tmp_size;
+ }
+ void resize(size_t siz) {
+ autoarray<T> tmp(arr, _size, siz);
+ swap(tmp);
+ }
+ void resize(size_t siz, const T& dummy) {
+ autoarray<T> tmp(arr, _size, siz, dummy);
+ swap(tmp);
+ }
+ T* rawpointer() {
+ return arr;
+ }
+ const T* operator~() const {
+ return arr;
+ }
+ T* begin() {
+ return arr;
+ }
+ T* end() {
+ return arr + _size;
+ }
+ T& back() {
+ Y_ASSERT(_size);
+ return arr[_size - 1];
+ }
+ bool empty() const {
+ return !_size;
+ }
+ bool operator!() const {
+ return !_size;
+ }
+ size_t operator+() const {
+ return _size;
+ }
+ const T* begin() const {
+ return arr;
+ }
+ const T* end() const {
+ return arr + _size;
+ }
+ const T& back() const {
+ Y_ASSERT(_size);
+ return arr[_size - 1];
+ }
+ //operator T*() { return arr; }
+};
+
+template <class T>
+inline bool operator==(const autoarray<T>& a, const autoarray<T>& b) {
+ size_t count = a.size();
+ if (count != b.size())
+ return false;
+ for (size_t i = 0; i < count; ++i) {
+ if (a[i] != b[i])
+ return false;
+ }
+ return true;
+}
diff --git a/library/cpp/deprecated/autoarray/ya.make b/library/cpp/deprecated/autoarray/ya.make
new file mode 100644
index 0000000000..4b055f8c29
--- /dev/null
+++ b/library/cpp/deprecated/autoarray/ya.make
@@ -0,0 +1,7 @@
+LIBRARY()
+
+SRCS(
+ autoarray.cpp
+)
+
+END()
diff --git a/library/cpp/deprecated/datafile/CMakeLists.darwin-x86_64.txt b/library/cpp/deprecated/datafile/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..3f88f788da
--- /dev/null
+++ b/library/cpp/deprecated/datafile/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,19 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-deprecated-datafile)
+target_link_libraries(cpp-deprecated-datafile PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ cpp-deprecated-mapped_file
+)
+target_sources(cpp-deprecated-datafile PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/datafile/datafile.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/datafile/loadmode.cpp
+)
diff --git a/library/cpp/deprecated/datafile/CMakeLists.linux-aarch64.txt b/library/cpp/deprecated/datafile/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..43da9ae45a
--- /dev/null
+++ b/library/cpp/deprecated/datafile/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,20 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-deprecated-datafile)
+target_link_libraries(cpp-deprecated-datafile PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ cpp-deprecated-mapped_file
+)
+target_sources(cpp-deprecated-datafile PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/datafile/datafile.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/datafile/loadmode.cpp
+)
diff --git a/library/cpp/deprecated/datafile/CMakeLists.linux-x86_64.txt b/library/cpp/deprecated/datafile/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..43da9ae45a
--- /dev/null
+++ b/library/cpp/deprecated/datafile/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,20 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-deprecated-datafile)
+target_link_libraries(cpp-deprecated-datafile PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ cpp-deprecated-mapped_file
+)
+target_sources(cpp-deprecated-datafile PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/datafile/datafile.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/datafile/loadmode.cpp
+)
diff --git a/library/cpp/deprecated/datafile/CMakeLists.txt b/library/cpp/deprecated/datafile/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/library/cpp/deprecated/datafile/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/library/cpp/deprecated/datafile/CMakeLists.windows-x86_64.txt b/library/cpp/deprecated/datafile/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..3f88f788da
--- /dev/null
+++ b/library/cpp/deprecated/datafile/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,19 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-deprecated-datafile)
+target_link_libraries(cpp-deprecated-datafile PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ cpp-deprecated-mapped_file
+)
+target_sources(cpp-deprecated-datafile PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/datafile/datafile.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/datafile/loadmode.cpp
+)
diff --git a/library/cpp/deprecated/datafile/README.md b/library/cpp/deprecated/datafile/README.md
new file mode 100644
index 0000000000..7f8547108e
--- /dev/null
+++ b/library/cpp/deprecated/datafile/README.md
@@ -0,0 +1,3 @@
+A wrapper on top of some user-defined custom file format.
+
+Just write your own if you need it. It's going to be way easier than figuring out how to use this one.
diff --git a/library/cpp/deprecated/datafile/datafile.cpp b/library/cpp/deprecated/datafile/datafile.cpp
new file mode 100644
index 0000000000..ff93f11c6b
--- /dev/null
+++ b/library/cpp/deprecated/datafile/datafile.cpp
@@ -0,0 +1,42 @@
+#include "datafile.h"
+
+void TDataFileBase::DoLoad(const char* fname, int loadMode) {
+ Destroy();
+ TFile f(fname, RdOnly);
+ DoLoad(f, loadMode, nullptr, 0);
+}
+
+void TDataFileBase::DoLoad(TFile& f, int loadMode, void* hdrPtr, size_t hdrSize) {
+ if (hdrPtr) {
+ if (loadMode & DLM_EXACT_SIZE && f.GetLength() != (i64)Length)
+ throw yexception() << f.GetName() << " size does not match its header value";
+ } else {
+ Length = f.GetLength();
+ hdrSize = 0;
+ }
+ if ((loadMode & DLM_LD_TYPE_MASK) == DLM_READ) {
+ MemData = TVector<char>(Length);
+ memcpy(MemData.begin(), hdrPtr, hdrSize);
+ f.Load(MemData.begin() + hdrSize, Length - hdrSize);
+ Start = MemData.begin();
+ } else {
+ FileData.init(f);
+ if (FileData.getSize() < Length)
+ throw yexception() << f.GetName() << " is smaller than what its header value says";
+ if ((loadMode & DLM_LD_TYPE_MASK) == DLM_MMAP_PRC)
+ FileData.precharge();
+ Start = (const char*)FileData.getData();
+ }
+}
+
+void TDataFileBase::Destroy() {
+ TVector<char>().swap(MemData);
+ FileData.term();
+ Start = nullptr;
+ Length = 0;
+}
+
+void TDataFileBase::Precharge() const {
+ if (Length && Start == (char*)FileData.getData())
+ FileData.precharge();
+}
diff --git a/library/cpp/deprecated/datafile/datafile.h b/library/cpp/deprecated/datafile/datafile.h
new file mode 100644
index 0000000000..a438baceca
--- /dev/null
+++ b/library/cpp/deprecated/datafile/datafile.h
@@ -0,0 +1,88 @@
+#pragma once
+
+#include "loadmode.h"
+
+#include <library/cpp/deprecated/mapped_file/mapped_file.h>
+
+#include <util/generic/vector.h>
+#include <util/system/file.h>
+#include <util/system/filemap.h>
+
+/** Simple helper that allows a file to be either mapped or read into malloc'ed memory.
+ This behaviour is controlled by EDataLoadMode enum defined in loadmode.h.
+ Unlike TBlob it provides Precharge() function and simple file size - based integrity check.
+
+ To use this code, inherit your class from TDataFile<TFileHeader>.
+ TFileHeader must be a pod-type structure with byte layout of the file header.
+ File must start with that header.
+ TFileHeader must have FileSize() member function that determines expected file size or
+ length of data that need to be read from the beginning of file.
+ */
+
+class TDataFileBase {
+protected:
+ TVector<char> MemData;
+ TMappedFile FileData;
+
+ const char* Start;
+ size_t Length;
+
+ TDataFileBase()
+ : Start(nullptr)
+ , Length(0)
+ {
+ }
+
+ void DoLoad(TFile& f, int loadMode, void* hdrPtr, size_t hdrSize);
+ void DoLoad(const char* fname, int loadMode); // just whole file
+ void Destroy();
+ void swap(TDataFileBase& with) {
+ MemData.swap(with.MemData);
+ FileData.swap(with.FileData);
+ DoSwap(Start, with.Start);
+ DoSwap(Length, with.Length);
+ }
+
+public:
+ void Precharge() const;
+};
+
+template <class TFileHeader>
+class TDataFile: public TDataFileBase {
+protected:
+ void Load(const char* fname, EDataLoadMode loadMode) {
+ Destroy();
+ TFile f(fname, RdOnly | Seq);
+ TFileHeader hdr;
+ f.Load(&hdr, sizeof(hdr));
+ Length = hdr.FileSize();
+ DoLoad(f, (int)loadMode, &hdr, sizeof(hdr));
+ }
+ const TFileHeader& Hdr() const {
+ return *(TFileHeader*)Start;
+ }
+};
+
+// Use: class TFoo: public TDataFileEx<Foo> {...};
+// Additional requrement: TFileHeader must have Validate(fname) function that throws exception.
+// Class TUser itself must have Init(fname) function
+// Adds Load() function to your class (TUser)
+template <class TUser, class TFileHeader>
+class TDataFileEx: public TDataFile<TFileHeader> {
+private:
+ using TBase = TDataFile<TFileHeader>;
+ TUser& User() const {
+ return *(TUser*)this;
+ }
+
+public:
+ TDataFileEx(const char* fname, EDataLoadMode loadMode = DLM_DEFAULT) {
+ if (fname)
+ Load(fname, loadMode);
+ }
+ void Load(const char* fname, EDataLoadMode loadMode = DLM_DEFAULT) {
+ TBase::Load(fname, loadMode);
+ TBase::Hdr().Validate(fname);
+ User().Init(fname);
+ }
+};
diff --git a/library/cpp/deprecated/datafile/loadmode.cpp b/library/cpp/deprecated/datafile/loadmode.cpp
new file mode 100644
index 0000000000..a857830326
--- /dev/null
+++ b/library/cpp/deprecated/datafile/loadmode.cpp
@@ -0,0 +1 @@
+#include "loadmode.h"
diff --git a/library/cpp/deprecated/datafile/loadmode.h b/library/cpp/deprecated/datafile/loadmode.h
new file mode 100644
index 0000000000..f04054dd64
--- /dev/null
+++ b/library/cpp/deprecated/datafile/loadmode.h
@@ -0,0 +1,20 @@
+#pragma once
+
+// It is recommended to support all reasonal value combinations via this enum,
+// to let Load() function argument be of EDataLoadMode type, not just int type
+
+enum EDataLoadMode {
+ DLM_READ = 0,
+ DLM_MMAP_PRC = 1, // precharge
+ DLM_MMAP = 2, // w/o precharge
+ DLM_MMAP_AUTO_PRC = 3, // precharge automatically (same as DLM_MMAP unless specifically supported)
+ DLM_LD_TYPE_MASK = 15,
+ DLM_EXACT_SIZE = 16, // fail if input file is larger than what header says
+
+ DLM_READ_ESZ = DLM_READ | DLM_EXACT_SIZE,
+ DLM_MMAP_PRC_ESZ = DLM_MMAP_PRC | DLM_EXACT_SIZE,
+ DLM_MMAP_ESZ = DLM_MMAP | DLM_EXACT_SIZE,
+ DLM_MMAP_APRC_ESZ = DLM_MMAP_AUTO_PRC | DLM_EXACT_SIZE,
+
+ DLM_DEFAULT = DLM_MMAP_PRC_ESZ,
+};
diff --git a/library/cpp/deprecated/datafile/ya.make b/library/cpp/deprecated/datafile/ya.make
new file mode 100644
index 0000000000..1ad4fe9bc7
--- /dev/null
+++ b/library/cpp/deprecated/datafile/ya.make
@@ -0,0 +1,12 @@
+LIBRARY()
+
+SRCS(
+ datafile.cpp
+ loadmode.cpp
+)
+
+PEERDIR(
+ library/cpp/deprecated/mapped_file
+)
+
+END()
diff --git a/library/cpp/deprecated/fgood/CMakeLists.darwin-x86_64.txt b/library/cpp/deprecated/fgood/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..a82750e559
--- /dev/null
+++ b/library/cpp/deprecated/fgood/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,18 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-deprecated-fgood)
+target_link_libraries(cpp-deprecated-fgood PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+)
+target_sources(cpp-deprecated-fgood PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/fgood/ffb.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/fgood/fgood.cpp
+)
diff --git a/library/cpp/deprecated/fgood/CMakeLists.linux-aarch64.txt b/library/cpp/deprecated/fgood/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..52e29348fd
--- /dev/null
+++ b/library/cpp/deprecated/fgood/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,19 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-deprecated-fgood)
+target_link_libraries(cpp-deprecated-fgood PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+)
+target_sources(cpp-deprecated-fgood PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/fgood/ffb.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/fgood/fgood.cpp
+)
diff --git a/library/cpp/deprecated/fgood/CMakeLists.linux-x86_64.txt b/library/cpp/deprecated/fgood/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..52e29348fd
--- /dev/null
+++ b/library/cpp/deprecated/fgood/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,19 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-deprecated-fgood)
+target_link_libraries(cpp-deprecated-fgood PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+)
+target_sources(cpp-deprecated-fgood PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/fgood/ffb.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/fgood/fgood.cpp
+)
diff --git a/library/cpp/deprecated/fgood/CMakeLists.txt b/library/cpp/deprecated/fgood/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/library/cpp/deprecated/fgood/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/library/cpp/deprecated/fgood/CMakeLists.windows-x86_64.txt b/library/cpp/deprecated/fgood/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..a82750e559
--- /dev/null
+++ b/library/cpp/deprecated/fgood/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,18 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-deprecated-fgood)
+target_link_libraries(cpp-deprecated-fgood PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+)
+target_sources(cpp-deprecated-fgood PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/fgood/ffb.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/fgood/fgood.cpp
+)
diff --git a/library/cpp/deprecated/fgood/README.md b/library/cpp/deprecated/fgood/README.md
new file mode 100644
index 0000000000..4f66289657
--- /dev/null
+++ b/library/cpp/deprecated/fgood/README.md
@@ -0,0 +1,15 @@
+Some ancient wrappers on top of FILE*, and some string manupulation functions.
+
+Alternatives are as follows.
+
+For TFILEPtr. Use TIFStream or TOFStream if you need IO. For some rare use cases a TFileMap might also do.
+
+For fput/fget/getline. Use streams API.
+
+For struct ffb and struct prnstr. Just don't use them. Even if you can figure out what they do.
+
+For sf family of functions and TLineSplitter. Just use Split* from util/string/split.h
+
+For TSFReader. Use TMapTsvFile.
+
+For read_or_die family of functions. Use streams API.
diff --git a/library/cpp/deprecated/fgood/ffb.cpp b/library/cpp/deprecated/fgood/ffb.cpp
new file mode 100644
index 0000000000..aa9da861a6
--- /dev/null
+++ b/library/cpp/deprecated/fgood/ffb.cpp
@@ -0,0 +1,407 @@
+#include "ffb.h"
+
+#include <util/string/util.h> // str_spn
+#include <util/system/compat.h>
+#include <util/generic/yexception.h>
+
+#include <cstdio>
+#include <algorithm>
+
+#include <ctype.h>
+
+#ifdef _win_
+#include <io.h>
+#else
+#include <unistd.h>
+#endif
+
+ffb::ffb(FILE* file)
+ : TFILEPtr(file)
+{
+ if (file && !isatty(fileno(file)) && BUFSIZ < 512 * 1024)
+ setvbuf(file, nullptr, _IOFBF, 512 * 1024);
+}
+
+void ffb::operator=(FILE* f) {
+ TFILEPtr::operator=(f);
+ if (f && !isatty(fileno(f)) && BUFSIZ < 512 * 1024)
+ setvbuf(f, nullptr, _IOFBF, 512 * 1024);
+}
+
+void ffb::open(const char* name, const char* mode) {
+ TFILEPtr::open(name, mode);
+ if (!isatty(fileno(*this)) && BUFSIZ < 512 * 1024)
+ setvbuf(*this, nullptr, _IOFBF, 512 * 1024);
+}
+
+int sf(char** fb, char* buf) { //don't want to call sf(fb, buf, 32)
+ if (!(*buf && *buf != 10)) {
+ *fb = nullptr;
+ return 0;
+ }
+ int n = 1;
+ fb[0] = buf;
+ while (*buf && *buf != 10 && n < 31) {
+ if (*buf == '\t') {
+ *buf++ = 0;
+ fb[n++] = buf;
+ continue;
+ }
+ buf++;
+ }
+ if (*buf == 10 && buf[-1] == 13)
+ buf[-1] = 0;
+ *buf = 0;
+ fb[n] = nullptr;
+ return n;
+}
+
+int sf(char** fb, char* buf, size_t fb_sz) {
+ if (!(*buf && *buf != 10)) {
+ *fb = nullptr;
+ return 0;
+ }
+ fb_sz--;
+ int n = 1;
+ fb[0] = buf;
+ while (*buf && *buf != 10 && n < (int)fb_sz) {
+ if (*buf == '\t') {
+ *buf++ = 0;
+ fb[n++] = buf;
+ continue;
+ }
+ buf++;
+ }
+ if (*buf == 10 && buf[-1] == 13)
+ buf[-1] = 0;
+ *buf = 0;
+ fb[n] = nullptr;
+ return n;
+}
+
+inline int sf_blank(char** fb, char* buf, size_t fb_sz) {
+ while (isspace((ui8)*buf))
+ buf++;
+ if (!*buf) {
+ *fb = nullptr;
+ return 0;
+ }
+ fb_sz--;
+ int n = 1;
+ fb[0] = buf;
+ while (*buf && *buf != 10 && n < (int)fb_sz) {
+ if (isspace((ui8)*buf)) {
+ *buf++ = 0;
+ while (isspace((ui8)*buf))
+ buf++;
+ if (*buf)
+ fb[n++] = buf;
+ continue;
+ }
+ buf++;
+ }
+ if (*buf == 10 && buf[-1] == 13)
+ buf[-1] = 0;
+ *buf = 0;
+ fb[n] = nullptr;
+ return n;
+}
+
+int sf(char fs, char** fb, char* buf, size_t fb_sz) {
+ if (fs == ' ')
+ return sf_blank(fb, buf, fb_sz);
+ while (*buf == fs)
+ buf++;
+ if (!(*buf && *buf != 10)) {
+ *fb = nullptr;
+ return 0;
+ }
+ fb_sz--;
+ int n = 1;
+ fb[0] = buf;
+ while (*buf && *buf != 10 && n < (int)fb_sz) {
+ if (*buf == fs) {
+ *buf++ = 0;
+ while (*buf == fs)
+ buf++;
+ fb[n++] = buf;
+ continue;
+ }
+ buf++;
+ }
+ if (*buf == 10 && buf[-1] == 13)
+ buf[-1] = 0;
+ *buf = 0;
+ fb[n] = nullptr;
+ return n;
+}
+
+int sf(const char* fs, char** fb, char* buf, size_t fb_sz) {
+ if (!(*buf && *buf != 10)) {
+ *fb = nullptr;
+ return 0;
+ }
+ int fs_len = strlen(fs);
+ fb_sz--;
+ int n = 1;
+ fb[0] = buf;
+ while (*buf && *buf != 10 && n < (int)fb_sz) {
+ if (*buf == *fs && !strncmp(buf + 1, fs + 1, fs_len - 1)) {
+ *buf = 0;
+ buf += fs_len;
+ fb[n++] = buf;
+ continue;
+ }
+ buf++;
+ }
+ if (*buf == 10 && buf[-1] == 13)
+ buf[-1] = 0;
+ *buf = 0;
+ fb[n] = nullptr;
+ return n;
+}
+
+inline bool is_end(const char* p) {
+ return !p || !p[0];
+}
+
+int sf(const char* seps, char* buf, char** fb, size_t fb_sz) {
+ if (fb_sz < 1 || is_end(buf)) {
+ *fb = nullptr;
+ return 0;
+ }
+ str_spn sseps(seps);
+ fb[0] = nullptr;
+ int n = 0;
+ // skip leading delimeters
+ buf = sseps.cbrk(buf);
+ if (is_end(buf))
+ return 0;
+ // store fields
+ while (n < (int)fb_sz) {
+ fb[n++] = buf;
+ // find delimeters
+ buf = sseps.brk(buf + 1);
+ if (is_end(buf))
+ break;
+ *buf = 0;
+ // skip delimiters
+ buf = sseps.cbrk(buf + 1);
+ if (is_end(buf))
+ break;
+ }
+ fb[n] = nullptr;
+ return n;
+}
+
+void TLineSplitter::operator()(char* p, TVector<char*>& fields) const {
+ if (!p || !*p)
+ return;
+ char* q = p;
+ while (1) {
+ p = Sep.brk(p);
+ if (q && (p - q || !SkipEmpty()))
+ fields.push_back(q);
+ q = nullptr;
+ if (!*p)
+ break;
+ if (SepStrLen == 1 || (SepStrLen > 1 && !strncmp(p + 1, SepStr + 1, SepStrLen - 1))) {
+ *p = 0;
+ p += SepStrLen;
+ q = p;
+ } else
+ p++;
+ }
+}
+
+void TLineSplitter::operator()(const char* p, TVector<std::pair<const char*, size_t>>& fields) const {
+ if (!p || !*p)
+ return;
+ const char* q = p;
+ while (1) {
+ p = Sep.brk(p);
+ if (q && (p - q || !SkipEmpty()))
+ fields.push_back(std::make_pair(q, p - q));
+ q = nullptr;
+ if (!*p)
+ break;
+ if (SepStrLen == 1 || (SepStrLen > 1 && !strncmp(p + 1, SepStr + 1, SepStrLen - 1))) {
+ p += SepStrLen;
+ q = p;
+ } else
+ p++;
+ }
+}
+
+TSFReader::TSFReader(const char* fname, char sep, i32 nfrq) // if sep == ' ' isspace will be imitated (for compat)
+ : Split(str_spn(sep == ' ' ? "\t\n\v\f\r " : TString(1, sep).data()), sep == ' ')
+ , OpenPipe(false)
+{
+ Open(fname, nfrq);
+}
+
+TSFReader::TSFReader(const char* fname, const char* sep, i32 nfrq)
+ : Split(sep, false)
+ , OpenPipe(false)
+{
+ Open(fname, nfrq);
+}
+
+TSFReader::TSFReader(const char* fname, const TLineSplitter& spl, i32 nfrq)
+ : Split(spl)
+ , OpenPipe(false)
+{
+ Open(fname, nfrq);
+}
+
+void TSFReader::Open(const char* fname, i32 nfrq, size_t vbuf_size) {
+ FieldsRequired = nfrq;
+ NF = NR = 0;
+
+ if (IsOpen())
+ File.close();
+
+ if (!fname)
+ return;
+
+ if (!strcmp(fname, "/dev/stdin")) {
+ File.assign(stdin, "/dev/stdin");
+ } else {
+ if (OpenPipe)
+ File.popen(fname, "r");
+ else
+ File.open(fname, "r");
+ }
+ OpenPipe = false;
+ if (!isatty(fileno(File)))
+ setvbuf(File, nullptr, _IOFBF, vbuf_size);
+}
+
+void TSFReader::Popen(const char* pname, i32 nfrq, size_t vbuf_size) {
+ OpenPipe = true;
+ Open(pname, nfrq, vbuf_size);
+}
+
+bool TSFReader::NextLine(segmented_string_pool* pool) {
+ size_t line_len = 0;
+
+#ifdef __FreeBSD__
+ char* ptr = fgetln(File, &line_len);
+ if (!ptr)
+ return false;
+ if (!line_len || ptr[line_len - 1] != '\n') { // last line w/o newline
+ Buf.AssignNoAlias(ptr, line_len);
+ ptr = Buf.begin();
+ } else {
+ // can safely replace newline with \0
+ ptr[line_len - 1] = 0;
+ --line_len;
+ }
+#else
+ if (!getline(File, Buf))
+ return false;
+ char* ptr = Buf.begin();
+ line_len = Buf.size();
+#endif
+ if (line_len && ptr[line_len - 1] == '\r')
+ ptr[line_len - 1] = 0;
+
+ if (pool) {
+ char* nptr = pool->append(ptr);
+ Y_ASSERT(!strcmp(ptr, nptr));
+ ptr = nptr;
+ }
+
+ ++NR;
+ Fields.clear();
+ Split(ptr, Fields);
+ NF = Fields.size();
+
+ if (FieldsRequired != -1 && FieldsRequired != (int)NF)
+ ythrow yexception() << File.name() << " line " << NR << ": " << NF << " fields, expected " << FieldsRequired;
+
+ return true;
+}
+
+int prnstr::f(const char* c, ...) {
+ va_list params;
+ int n = asize - pos, k;
+ va_start(params, c);
+ while ((k = vsnprintf(buf + pos, n, c, params)) >= n) {
+ n += asize, asize *= 2;
+ while (k + pos >= n)
+ n += asize, asize *= 2;
+ char* t = new char[asize];
+ memcpy(t, buf, pos);
+ delete[] buf;
+ buf = t;
+ va_end(params);
+ va_start(params, c);
+ }
+ pos += k;
+ va_end(params);
+ return k;
+}
+int prnstr::s(const char* c, size_t k) {
+ if (!c)
+ return 0;
+ size_t n = asize - pos;
+ if (k >= n) {
+ n += asize, asize *= 2;
+ while (k + pos >= n)
+ n += asize, asize *= 2;
+ char* t = new char[asize];
+ memcpy(t, buf, pos);
+ delete[] buf;
+ buf = t;
+ }
+ memcpy(buf + pos, c, k);
+ pos += k;
+ buf[pos] = 0;
+ return k;
+}
+void prnstr::clear() {
+ pos = 0;
+ if (asize > 32768) {
+ asize = 32768;
+ delete[] buf;
+ buf = new char[asize];
+ }
+}
+
+void prnstr::swap(prnstr& w) {
+ std::swap(buf, w.buf);
+ std::swap(pos, w.pos);
+ std::swap(asize, w.asize);
+}
+
+FILE* read_or_die(const char* fname) {
+ FILE* f = fopen(fname, "rb");
+ if (!f)
+ err(1, "%s", fname);
+ return f;
+}
+FILE* write_or_die(const char* fname) {
+ FILE* f = fopen(fname, "wb");
+ if (!f)
+ err(1, "%s", fname);
+ return f;
+}
+FILE* fopen_or_die(const char* fname, const char* mode) {
+ FILE* f = fopen(fname, mode);
+ if (!f)
+ err(1, "%s (mode '%s')", fname, mode);
+ return f;
+}
+
+FILE* fopen_chk(const char* fname, const char* mode) {
+ FILE* f = fopen(fname, mode);
+ if (!f)
+ ythrow yexception() << fname << " (mode '" << mode << "'): " << LastSystemErrorText();
+ return f;
+}
+
+void fclose_chk(FILE* f, const char* fname) {
+ if (fclose(f))
+ ythrow yexception() << "file " << fname << ": " << LastSystemErrorText();
+}
diff --git a/library/cpp/deprecated/fgood/ffb.h b/library/cpp/deprecated/fgood/ffb.h
new file mode 100644
index 0000000000..ca229eb65a
--- /dev/null
+++ b/library/cpp/deprecated/fgood/ffb.h
@@ -0,0 +1,264 @@
+#pragma once
+
+#include "fgood.h"
+
+#include <util/string/util.h> // str_spn
+#include <util/string/split.h> // str_spn
+#include <util/memory/segmented_string_pool.h>
+#include <util/generic/string.h>
+#include <util/generic/vector.h>
+#include <util/generic/noncopyable.h>
+
+#include <utility>
+
+#include <cstdarg>
+#include <cstring>
+
+struct ffb: public TFILEPtr {
+ ffb() {
+ }
+ ffb(FILE* file);
+ ffb(const char* name, const char* mode) {
+ open(name, mode);
+ }
+ void operator=(FILE* f); // take ownership
+ void open(const char* name, const char* mode);
+ int f(const char* c, ...) {
+ va_list args;
+ va_start(args, c);
+ return vfprintf(*this, c, args);
+ }
+ void s(const char* c) {
+ fsput(c, strlen(c));
+ }
+ void b(const void* cc, int n) {
+ fsput((const char*)cc, n);
+ }
+ void B(const void* cc, int N) {
+ fsput((const char*)cc, N);
+ }
+ void c(char c) {
+ fputc(c);
+ }
+ void cbe(wchar16 c) { // big endian utf-16
+ fputc(char(c >> 8)); //Hi8
+ fputc(char(c & 255)); //Lo8
+ }
+ void sbe(const wchar16* c) {
+ for (; *c; c++)
+ cbe(*c);
+ }
+ void fclose() {
+ close();
+ }
+};
+
+// split fields of tab-delimited line of text
+// here and below fb actual size must be fb_sz + 1 to allow fb[fb_sz] be zero
+int sf(char** fb, char* buf, size_t fb_sz);
+int sf(char** fb, char* buf /* fb_sz == 32 */);
+
+// split fields of char-delimited line of text
+// Achtung: delim = ' ' imitates awk: initial separators are skipped,
+// repeated seps treated as one, all chars less than ' ' treated as separators.
+int sf(char fs, char** fb, char* buf, size_t fb_sz = 32);
+
+// split fields of string-delimited line of text (fs is NOT a regexp)
+// (usually fs is "@@")
+int sf(const char* fs, char** fb, char* buf, size_t fb_sz = 32);
+
+// split fields of char-delimited line of text, set of char-separators is given
+// Achtung: repeated seps treated as one, initial seps are skipped
+// newlines are NOT ignored.
+int sf(const char* seps, char* buf, char** fb, size_t fb_sz = 32);
+
+inline char* chomp(char* buf) {
+ char* c = buf + strlen(buf);
+ if (c > buf && c[-1] == '\n') {
+ *--c = 0;
+#ifdef _win32_
+ if (c > buf && c[-1] == '\r')
+ *--c = 0;
+#endif
+ }
+ return buf;
+}
+
+inline char* chomp_cr(char* buf) {
+ char* c = buf + strlen(buf);
+ if (c > buf && c[-1] == '\n')
+ *--c = 0;
+ if (c > buf && c[-1] == '\r')
+ *--c = 0;
+ return buf;
+}
+
+class TLineSplitter {
+protected:
+ enum { // Default: Split string by SepStr
+ SplitByAnySep = 1, // Split string by Sep
+ NoEmptyFields = 2 // Skip all empty fields between separators
+ };
+
+private:
+ ui32 Flags;
+ const str_spn Sep; // collection of separators
+ const char* SepStr; // pointer exact string to separate by
+ size_t SepStrLen; // length of separator string
+
+public:
+ TLineSplitter(const char* sep, bool noEmpty)
+ : Flags(noEmpty ? NoEmptyFields : 0)
+ , Sep(TString(sep, 1).data())
+ , SepStr(sep)
+ , SepStrLen(strlen(sep))
+ {
+ }
+ TLineSplitter(const str_spn& sep, bool noEmpty = false)
+ : Flags(SplitByAnySep | (noEmpty ? NoEmptyFields : 0))
+ , Sep(sep)
+ , SepStr(nullptr)
+ , SepStrLen(1)
+ {
+ }
+ bool AnySep() const {
+ return Flags & SplitByAnySep;
+ }
+ bool SkipEmpty() const {
+ return Flags & NoEmptyFields;
+ }
+ /// Separates string onto tokens
+ /// Expecting a zero-terminated string
+ /// By default returns empty fields between sequential separators
+ void operator()(char* p, TVector<char*>& fields) const;
+ /// Same, but for const string - fills vector of pairs (pointer, length)
+ void operator()(const char* p, TVector<std::pair<const char*, size_t>>& fields) const;
+};
+
+/**
+ * Use library/cpp/map_text_file/map_tsv_file.h instead.
+ */
+class TSFReader {
+ TString Buf; // buffer used for non-'\n'-terminated string and for non-freebsd work
+ TLineSplitter Split;
+ TVector<char*> Fields;
+ size_t NF; // Fields.size()
+ size_t NR;
+
+ TFILEPtr File;
+
+ bool OpenPipe; // internal flag that turns open() to popen()
+
+ i32 FieldsRequired; // if != -1, != nf, terminate program
+
+public:
+ // char separator
+ // Achtung: delim = ' ' imitates awk: initial separators are skipped,
+ // all chars less than ' ' treated as separators.
+ TSFReader(const char* fname = nullptr, char sep = '\t', i32 nf_reqired = -1);
+ // exact string separator
+ TSFReader(const char* fname, const char* sep, i32 nf_reqired = -1);
+ // fully customizable
+ TSFReader(const char* fname, const TLineSplitter& spl, i32 nf_reqired = -1);
+
+ void Open(const char* fname, i32 nf_reqired = -1, size_t vbufsize = 1u << 21); // use "/dev/stdin" for stdin
+ void Popen(const char* pname, i32 nf_reqired = -1, size_t vbufsize = 1u << 21);
+
+ bool NextLine(segmented_string_pool* pool = nullptr);
+
+ bool IsOpen() const {
+ return (FILE*)File != nullptr;
+ }
+ bool IsEof() const {
+ return feof(File);
+ }
+ void Close() {
+ File.close();
+ }
+ void Rewind() {
+ File.seek(0, SEEK_SET);
+ }
+ void Seek(i64 offset, int mode = SEEK_SET) {
+ File.seek(offset, mode);
+ }
+ i64 Tell() const {
+ return ftell(File);
+ }
+ char*& operator[](size_t ind) {
+ //if (ind >= NF)
+ // throw yexception("Can't return reference to unexisting field %" PRISZT, ind);
+ return Fields[ind];
+ }
+ const char* operator[](size_t ind) const {
+ if (ind >= NF)
+ return nullptr;
+ return Fields[ind];
+ }
+ operator int() const { // note: empty input line makes 0 fields
+ return (int)NF;
+ }
+ const char* Name() const {
+ return File.name().data();
+ }
+ size_t Line() const {
+ return NR;
+ }
+ const TVector<char*>& GetFields() const {
+ return Fields;
+ }
+};
+
+struct prnstr {
+ char* buf;
+ int pos;
+ int asize;
+ prnstr()
+ : pos(0)
+ {
+ asize = 32;
+ buf = new char[asize];
+ }
+ explicit prnstr(int asz)
+ : pos(0)
+ {
+ asize = asz;
+ buf = new char[asize];
+ }
+ int f(const char* c, ...);
+ int s(const char* c1, const char* c2);
+ int s(const char* c1, const char* c2, const char* c3);
+ int s(const char* c, size_t len);
+ //int s(const char *c);
+ int s(const char* c) {
+ return c ? s(c, strlen(c)) : 0;
+ }
+ int s(const TString& c);
+ int s_htmesc(const char* c, bool enc_utf = false);
+ int s_htmesc_w(const char* c);
+ int c(char c);
+ int cu(wchar32 c); //for utf-8
+ void restart() {
+ *buf = 0;
+ pos = 0;
+ }
+ const char* operator~() const {
+ return buf;
+ }
+ int operator+() const {
+ return pos;
+ }
+ ~prnstr() {
+ delete[] buf;
+ }
+ void clear();
+ void swap(prnstr& w);
+};
+
+// functions that terminate program upon failure
+FILE* read_or_die(const char* fname);
+FILE* write_or_die(const char* fname);
+FILE* fopen_or_die(const char* fname, const char* mode);
+
+// functions that throw upon failure
+FILE* fopen_chk(const char* fname, const char* mode);
+void fclose_chk(FILE* f, const char* fname_dbg);
diff --git a/library/cpp/deprecated/fgood/fgood.cpp b/library/cpp/deprecated/fgood/fgood.cpp
new file mode 100644
index 0000000000..5d4725bfae
--- /dev/null
+++ b/library/cpp/deprecated/fgood/fgood.cpp
@@ -0,0 +1,70 @@
+#include "fgood.h"
+
+#include <util/generic/cast.h>
+#include <util/string/cast.h>
+#include <util/system/fstat.h>
+
+#ifdef _win32_
+#include <io.h>
+#endif
+
+i64 TFILEPtr::length() const {
+#ifdef _win32_
+ FHANDLE fd = (FHANDLE)_get_osfhandle(fileno(m_file));
+#else
+ FHANDLE fd = fileno(m_file);
+#endif
+ i64 rv = GetFileLength(fd);
+ if (rv < 0)
+ ythrow yexception() << "TFILEPtr::length() " << Name.data() << ": " << LastSystemErrorText();
+ return rv;
+}
+
+FILE* OpenFILEOrFail(const TString& name, const char* mode) {
+ FILE* res = ::fopen(name.data(), mode);
+ if (!res) {
+ ythrow yexception() << "can't open \'" << name << "\' with mode \'" << mode << "\': " << LastSystemErrorText();
+ }
+ return res;
+}
+
+void TFILECloser::Destroy(FILE* file) {
+ ::fclose(file);
+}
+
+#ifdef _freebsd_ // fgetln
+#define getline getline_alt_4test
+#endif // _freebsd_
+
+bool getline(TFILEPtr& f, TString& s) {
+ char buf[4096];
+ char* buf_ptr;
+ if (s.capacity() > sizeof(buf)) {
+ s.resize(s.capacity());
+ if ((buf_ptr = fgets(s.begin(), IntegerCast<int>(s.capacity()), f)) == nullptr)
+ return false;
+ } else {
+ if ((buf_ptr = fgets(buf, sizeof(buf), f)) == nullptr)
+ return false;
+ }
+ size_t buf_len = strlen(buf_ptr);
+ bool line_complete = buf_len && buf_ptr[buf_len - 1] == '\n';
+ if (line_complete)
+ buf_len--;
+ if (buf_ptr == s.begin())
+ s.resize(buf_len);
+ else
+ s.AssignNoAlias(buf, buf_len);
+ if (line_complete)
+ return true;
+ while (fgets(buf, sizeof(buf), f)) {
+ size_t buf_len2 = strlen(buf);
+ if (buf_len2 && buf[buf_len2 - 1] == '\n') {
+ buf[buf_len2 - 1] = 0;
+ s.append(buf, buf_len2 - 1);
+ return true;
+ }
+ s.append(buf, buf_len2);
+ }
+ return true;
+}
diff --git a/library/cpp/deprecated/fgood/fgood.h b/library/cpp/deprecated/fgood/fgood.h
new file mode 100644
index 0000000000..0aaf910c0f
--- /dev/null
+++ b/library/cpp/deprecated/fgood/fgood.h
@@ -0,0 +1,328 @@
+#pragma once
+
+#include <util/system/yassert.h>
+#include <util/system/defaults.h>
+#include <util/generic/string.h>
+#include <util/generic/yexception.h>
+#include <util/generic/ptr.h>
+
+#include "fput.h"
+
+#include <cstdio>
+
+#include <fcntl.h>
+
+#ifdef _unix_
+extern "C" int __ungetc(int, FILE*);
+#endif
+
+#if (!defined(__FreeBSD__) && !defined(__linux__) && !defined(_darwin_) && !defined(_cygwin_)) || defined(_bionic_)
+#define feof_unlocked(_stream) feof(_stream)
+#define ferror_unlocked(_stream) ferror(_stream)
+#endif
+
+#ifndef _unix_
+#if defined(_MSC_VER) && (_MSC_VER < 1900)
+#define getc_unlocked(_stream) (--(_stream)->_cnt >= 0 ? 0xff & *(_stream)->_ptr++ : _filbuf(_stream))
+#define putc_unlocked(_c, _stream) (--(_stream)->_cnt >= 0 ? 0xff & (*(_stream)->_ptr++ = (char)(_c)) : _flsbuf((_c), (_stream)))
+#else
+#define getc_unlocked(_stream) getc(_stream)
+#define putc_unlocked(_c, _stream) putc(_c, _stream)
+#endif
+#endif
+
+inline bool fgood(FILE* f) {
+ return !feof_unlocked(f) && !ferror_unlocked(f);
+}
+
+#ifdef _win32_
+// These functions will work only with static MSVC runtime linkage. For dynamic linkage,
+// fseeki64.c and ftelli64.c from CRT sources should be included in project
+extern "C" int __cdecl _fseeki64(FILE*, __int64, int);
+extern "C" __int64 __cdecl _ftelli64(FILE*);
+
+inline i64 ftello(FILE* stream) {
+ return _ftelli64(stream);
+}
+
+inline int fseeko(FILE* stream, i64 offset, int origin) {
+ return _fseeki64(stream, offset, origin);
+}
+#endif
+
+class TFILEPtr {
+private:
+ enum { SHOULD_CLOSE = 1,
+ IS_PIPE = 2 };
+ FILE* m_file;
+ int m_Flags;
+ TString Name;
+
+public:
+ TFILEPtr() noexcept {
+ m_file = nullptr;
+ m_Flags = 0;
+ }
+ TFILEPtr(const TString& name, const char* mode) {
+ m_file = nullptr;
+ m_Flags = 0;
+ open(name, mode);
+ }
+ TFILEPtr(const TFILEPtr& src) noexcept {
+ m_file = src.m_file;
+ m_Flags = 0;
+ }
+ TFILEPtr& operator=(const TFILEPtr& src) {
+ if (src.m_file != m_file) {
+ close();
+ m_file = src.m_file;
+ m_Flags = 0;
+ }
+ return *this;
+ }
+ explicit TFILEPtr(FILE* f) noexcept { // take ownership
+ m_file = f;
+ m_Flags = SHOULD_CLOSE;
+ }
+ TFILEPtr& operator=(FILE* f) { // take ownership
+ if (f != m_file) {
+ close();
+ m_file = f;
+ m_Flags = SHOULD_CLOSE;
+ }
+ return *this;
+ }
+ const TString& name() const {
+ return Name;
+ }
+ operator FILE*() const noexcept {
+ return m_file;
+ }
+ FILE* operator->() const noexcept {
+ return m_file;
+ }
+ bool operator!() const noexcept {
+ return m_file == nullptr;
+ }
+ bool operator!=(FILE* f) const noexcept {
+ return m_file != f;
+ }
+ bool operator==(FILE* f) const noexcept {
+ return m_file == f;
+ }
+ ~TFILEPtr() {
+ close();
+ }
+ void Y_PRINTF_FORMAT(2, 3) check(const char* message, ...) const {
+ if (Y_UNLIKELY(!fgood(m_file))) {
+ va_list args;
+ va_start(args, message);
+ char buf[512];
+ vsnprintf(buf, 512, message, args);
+ // XXX: errno is undefined here
+ ythrow yexception() << buf << ": " << LastSystemErrorText() << ", " << Name.data() << " at offset " << (i64)ftell();
+ }
+ }
+ TFILEPtr& assign(FILE* f, const char* name = nullptr) { // take ownership and have a name
+ *this = f;
+ if (name)
+ Name = name;
+ return *this;
+ }
+ void open(const TString& name, const char* mode) {
+ Y_ASSERT(!name.empty());
+ Y_ASSERT(m_file == nullptr);
+ m_file = ::fopen(name.data(), mode);
+ if (!m_file)
+ ythrow yexception() << "can't open \'" << name << "\' with mode \'" << mode << "\': " << LastSystemErrorText();
+ m_Flags = SHOULD_CLOSE;
+ Name = name;
+ }
+ void popen(const TString& command, const char* mode) {
+ Y_ASSERT(!command.empty());
+ Y_ASSERT(m_file == nullptr);
+ m_file = ::popen(command.data(), mode);
+ if (!m_file)
+ ythrow yexception() << "can't execute \'" << command << "\' with mode \'" << mode << "\': " << LastSystemErrorText();
+ m_Flags = IS_PIPE | SHOULD_CLOSE;
+ Name = command;
+ }
+ void close() {
+ if (m_file != nullptr && (m_Flags & SHOULD_CLOSE)) {
+ if ((m_Flags & IS_PIPE) ? ::pclose(m_file) : ::fclose(m_file)) {
+ m_file = nullptr;
+ m_Flags = 0;
+ if (!UncaughtException())
+ ythrow yexception() << "can't close file " << Name.data() << ": " << LastSystemErrorText();
+ }
+ }
+ m_file = nullptr;
+ m_Flags = 0;
+ Name.clear();
+ }
+ size_t write(const void* buffer, size_t size, size_t count) const {
+ Y_ASSERT(m_file != nullptr);
+ size_t r = ::fwrite(buffer, size, count, m_file);
+ check("can't write %lu bytes", (unsigned long)size * count);
+ return r;
+ }
+ size_t read(void* buffer, size_t size, size_t count) const {
+ Y_ASSERT(m_file != nullptr);
+ size_t r = ::fread(buffer, size, count, m_file);
+ if (ferror_unlocked(m_file))
+ ythrow yexception() << "can't read " << (unsigned long)size * count << " bytes: " << LastSystemErrorText() << ", " << Name.data() << " at offset " << (i64)ftell();
+ return r;
+ }
+ char* fgets(char* buffer, int size) const {
+ Y_ASSERT(m_file != nullptr);
+ char* r = ::fgets(buffer, size, m_file);
+ if (ferror_unlocked(m_file))
+ ythrow yexception() << "can't read string of maximum size " << size << ": " << LastSystemErrorText() << ", " << Name.data() << " at offset " << (i64)ftell();
+ return r;
+ }
+ void Y_PRINTF_FORMAT(2, 3) fprintf(const char* format, ...) {
+ Y_ASSERT(m_file != nullptr);
+ va_list args;
+ va_start(args, format);
+ vfprintf(m_file, format, args);
+ check("can't write");
+ }
+ void seek(i64 offset, int origin) const {
+ Y_ASSERT(m_file != nullptr);
+#if defined(_unix_) || defined(_win32_)
+ if (fseeko(m_file, offset, origin) != 0)
+#else
+ Y_ASSERT(offset == (i64)(i32)offset);
+ if (::fseek(m_file, (long)offset, origin) != 0)
+#endif
+ ythrow yexception() << "can't seek " << Name.data() << " by " << offset << ": " << LastSystemErrorText();
+ }
+ i64 length() const; // uses various system headers -> in fileptr.cpp
+
+ void setDirect() const {
+#if !defined(_win_) && !defined(_darwin_)
+ if (!m_file)
+ ythrow yexception() << "file not open";
+ if (fcntl(fileno(m_file), F_SETFL, O_DIRECT) == -1)
+ ythrow yexception() << "Cannot set O_DIRECT flag";
+#endif
+ }
+
+ // for convenience
+
+ i64 ftell() const noexcept {
+#if defined(_unix_) || defined(_win32_)
+ return ftello(m_file);
+#else
+ return ftell(m_file);
+#endif
+ }
+ bool eof() const noexcept {
+ Y_ASSERT(m_file != nullptr);
+ return feof_unlocked(m_file) != 0;
+ }
+ int fputc(int c) {
+ Y_ASSERT(m_file != nullptr);
+ return putc_unlocked(c, m_file);
+ }
+ size_t fputs(const char* buffer) const {
+ return write(buffer, strlen(buffer), 1);
+ }
+ int fgetc() {
+ Y_ASSERT(m_file != nullptr);
+ return getc_unlocked(m_file);
+ }
+ int ungetc(int c) {
+ Y_ASSERT(m_file != nullptr);
+ return ::ungetc(c, m_file);
+ }
+ template <class T>
+ size_t fput(const T& a) {
+ Y_ASSERT(m_file != nullptr);
+ return ::fput(m_file, a);
+ }
+ template <class T>
+ size_t fget(T& a) {
+ Y_ASSERT(m_file != nullptr);
+ return ::fget(m_file, a);
+ }
+ size_t fsput(const char* s, size_t l) {
+ Y_ASSERT(m_file != nullptr);
+ return ::fsput(m_file, s, l);
+ }
+ size_t fsget(char* s, size_t l) {
+ Y_ASSERT(m_file != nullptr);
+ return ::fsget(m_file, s, l);
+ }
+
+ void fflush() {
+ ::fflush(m_file);
+ }
+
+ /* This block contains some TFile/TStream - compatible names */
+ size_t Read(void* bufferIn, size_t numBytes) {
+ size_t r = fsget((char*)bufferIn, numBytes);
+ if (Y_UNLIKELY(ferror_unlocked(m_file)))
+ ythrow yexception() << "can't read " << numBytes << " bytes: " << LastSystemErrorText() << ", " << Name << " at offset " << (i64)ftell();
+ return r;
+ }
+ void Write(const void* buffer, size_t numBytes) {
+ write(buffer, 1, numBytes);
+ }
+ i64 Seek(i64 offset, int origin /*SeekDir*/) {
+ seek(offset, origin);
+ return ftell();
+ }
+ i64 GetPosition() const noexcept {
+ return ftell();
+ }
+ i64 GetLength() const noexcept {
+ return length();
+ }
+ bool ReadLine(TString& st);
+
+ /* Similar to TAutoPtr::Release - return pointer and forget about it. */
+ FILE* Release() noexcept {
+ FILE* result = m_file;
+ m_file = nullptr;
+ m_Flags = 0;
+ Name.clear();
+ return result;
+ }
+};
+
+inline void fclose(TFILEPtr& F) {
+ F.close();
+}
+
+inline void fseek(const TFILEPtr& F, i64 offset, int whence) {
+ F.seek(offset, whence);
+}
+
+#ifdef _freebsd_ // fgetln
+inline bool getline(TFILEPtr& f, TString& s) {
+ size_t len;
+ char* buf = fgetln(f, &len);
+ if (!buf)
+ return false;
+ if (len && buf[len - 1] == '\n')
+ len--;
+ s.AssignNoAlias(buf, len);
+ return true;
+}
+#else
+bool getline(TFILEPtr& f, TString& s);
+#endif //_freebsd_
+
+inline bool TFILEPtr::ReadLine(TString& st) {
+ return getline(*this, st);
+}
+
+FILE* OpenFILEOrFail(const TString& name, const char* mode);
+
+//Should be used with THolder
+struct TFILECloser {
+ static void Destroy(FILE* file);
+};
+
+using TFILEHolder = THolder<FILE, TFILECloser>;
diff --git a/library/cpp/deprecated/fgood/fput.h b/library/cpp/deprecated/fgood/fput.h
new file mode 100644
index 0000000000..690b06332d
--- /dev/null
+++ b/library/cpp/deprecated/fgood/fput.h
@@ -0,0 +1,79 @@
+#pragma once
+
+#include <util/system/defaults.h>
+#include <util/system/valgrind.h>
+
+#include <cstdio>
+
+#ifdef __FreeBSD__
+#include <cstring>
+
+template <class T>
+Y_FORCE_INLINE size_t fput(FILE* F, const T& a) {
+ if (Y_LIKELY(F->_w >= int(sizeof(a)))) {
+ memcpy(F->_p, &a, sizeof(a));
+ F->_p += sizeof(a);
+ F->_w -= sizeof(a);
+ return 1;
+ } else {
+ return fwrite(&a, sizeof(a), 1, F);
+ }
+}
+
+template <class T>
+Y_FORCE_INLINE size_t fget(FILE* F, T& a) {
+ if (Y_LIKELY(F->_r >= int(sizeof(a)))) {
+ memcpy(&a, F->_p, sizeof(a));
+ F->_p += sizeof(a);
+ F->_r -= sizeof(a);
+ return 1;
+ } else {
+ return fread(&a, sizeof(a), 1, F);
+ }
+}
+
+inline size_t fsput(FILE* F, const char* s, size_t l) {
+ VALGRIND_CHECK_READABLE(s, l);
+
+ if ((size_t)F->_w >= l) {
+ memcpy(F->_p, s, l);
+ F->_p += l;
+ F->_w -= l;
+ return l;
+ } else {
+ return fwrite(s, 1, l, F);
+ }
+}
+
+inline size_t fsget(FILE* F, char* s, size_t l) {
+ if ((size_t)F->_r >= l) {
+ memcpy(s, F->_p, l);
+ F->_p += l;
+ F->_r -= l;
+ return l;
+ } else {
+ return fread(s, 1, l, F);
+ }
+}
+#else
+template <class T>
+Y_FORCE_INLINE size_t fput(FILE* F, const T& a) {
+ return fwrite(&a, sizeof(a), 1, F);
+}
+
+template <class T>
+Y_FORCE_INLINE size_t fget(FILE* F, T& a) {
+ return fread(&a, sizeof(a), 1, F);
+}
+
+inline size_t fsput(FILE* F, const char* s, size_t l) {
+#ifdef WITH_VALGRIND
+ VALGRIND_CHECK_READABLE(s, l);
+#endif
+ return fwrite(s, 1, l, F);
+}
+
+inline size_t fsget(FILE* F, char* s, size_t l) {
+ return fread(s, 1, l, F);
+}
+#endif
diff --git a/library/cpp/deprecated/fgood/ya.make b/library/cpp/deprecated/fgood/ya.make
new file mode 100644
index 0000000000..2394f9ad7a
--- /dev/null
+++ b/library/cpp/deprecated/fgood/ya.make
@@ -0,0 +1,8 @@
+LIBRARY()
+
+SRCS(
+ ffb.cpp
+ fgood.cpp
+)
+
+END()
diff --git a/library/cpp/deprecated/mapped_file/CMakeLists.darwin-x86_64.txt b/library/cpp/deprecated/mapped_file/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..a00407491d
--- /dev/null
+++ b/library/cpp/deprecated/mapped_file/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-deprecated-mapped_file)
+target_link_libraries(cpp-deprecated-mapped_file PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+)
+target_sources(cpp-deprecated-mapped_file PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/mapped_file/mapped_file.cpp
+)
diff --git a/library/cpp/deprecated/mapped_file/CMakeLists.linux-aarch64.txt b/library/cpp/deprecated/mapped_file/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..2bb5db017b
--- /dev/null
+++ b/library/cpp/deprecated/mapped_file/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,18 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-deprecated-mapped_file)
+target_link_libraries(cpp-deprecated-mapped_file PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+)
+target_sources(cpp-deprecated-mapped_file PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/mapped_file/mapped_file.cpp
+)
diff --git a/library/cpp/deprecated/mapped_file/CMakeLists.linux-x86_64.txt b/library/cpp/deprecated/mapped_file/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..2bb5db017b
--- /dev/null
+++ b/library/cpp/deprecated/mapped_file/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,18 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-deprecated-mapped_file)
+target_link_libraries(cpp-deprecated-mapped_file PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+)
+target_sources(cpp-deprecated-mapped_file PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/mapped_file/mapped_file.cpp
+)
diff --git a/library/cpp/deprecated/mapped_file/CMakeLists.txt b/library/cpp/deprecated/mapped_file/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/library/cpp/deprecated/mapped_file/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/library/cpp/deprecated/mapped_file/CMakeLists.windows-x86_64.txt b/library/cpp/deprecated/mapped_file/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..a00407491d
--- /dev/null
+++ b/library/cpp/deprecated/mapped_file/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-deprecated-mapped_file)
+target_link_libraries(cpp-deprecated-mapped_file PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+)
+target_sources(cpp-deprecated-mapped_file PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/mapped_file/mapped_file.cpp
+)
diff --git a/library/cpp/deprecated/mapped_file/mapped_file.cpp b/library/cpp/deprecated/mapped_file/mapped_file.cpp
new file mode 100644
index 0000000000..b0e4511299
--- /dev/null
+++ b/library/cpp/deprecated/mapped_file/mapped_file.cpp
@@ -0,0 +1,64 @@
+#include "mapped_file.h"
+
+#include <util/generic/yexception.h>
+#include <util/system/defaults.h>
+#include <util/system/hi_lo.h>
+#include <util/system/filemap.h>
+
+TMappedFile::TMappedFile(TFileMap* map, const char* dbgName) {
+ Map_ = map;
+ i64 len = Map_->Length();
+ if (Hi32(len) != 0 && sizeof(size_t) <= sizeof(ui32))
+ ythrow yexception() << "File '" << dbgName << "' mapping error: " << len << " too large";
+
+ Map_->Map(0, static_cast<size_t>(len));
+}
+
+TMappedFile::TMappedFile(const TFile& file, TFileMap::EOpenMode om, const char* dbgName)
+ : Map_(nullptr)
+{
+ init(file, om, dbgName);
+}
+
+void TMappedFile::precharge(size_t off, size_t size) const {
+ if (!Map_)
+ return;
+
+ Map_->Precharge(off, size);
+}
+
+void TMappedFile::init(const TString& name) {
+ THolder<TFileMap> map(new TFileMap(name));
+ TMappedFile newFile(map.Get(), name.data());
+ Y_UNUSED(map.Release());
+ newFile.swap(*this);
+ newFile.term();
+}
+
+void TMappedFile::init(const TString& name, size_t length, TFileMap::EOpenMode om) {
+ THolder<TFileMap> map(new TFileMap(name, length, om));
+ TMappedFile newFile(map.Get(), name.data());
+ Y_UNUSED(map.Release());
+ newFile.swap(*this);
+ newFile.term();
+}
+
+void TMappedFile::init(const TFile& file, TFileMap::EOpenMode om, const char* dbgName) {
+ THolder<TFileMap> map(new TFileMap(file, om));
+ TMappedFile newFile(map.Get(), dbgName);
+ Y_UNUSED(map.Release());
+ newFile.swap(*this);
+ newFile.term();
+}
+
+void TMappedFile::init(const TString& name, TFileMap::EOpenMode om) {
+ THolder<TFileMap> map(new TFileMap(name, om));
+ TMappedFile newFile(map.Get(), name.data());
+ Y_UNUSED(map.Release());
+ newFile.swap(*this);
+ newFile.term();
+}
+
+void TMappedFile::flush() {
+ Map_->Flush();
+}
diff --git a/library/cpp/deprecated/mapped_file/ya.make b/library/cpp/deprecated/mapped_file/ya.make
new file mode 100644
index 0000000000..309341f1da
--- /dev/null
+++ b/library/cpp/deprecated/mapped_file/ya.make
@@ -0,0 +1,7 @@
+LIBRARY()
+
+SRCS(
+ mapped_file.cpp
+)
+
+END()
diff --git a/library/cpp/geo/CMakeLists.darwin-x86_64.txt b/library/cpp/geo/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..87e48b4a71
--- /dev/null
+++ b/library/cpp/geo/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,24 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(library-cpp-geo)
+target_link_libraries(library-cpp-geo PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+)
+target_sources(library-cpp-geo PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/geo/bbox.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/geo/geo.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/geo/point.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/geo/polygon.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/geo/load_save_helper.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/geo/size.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/geo/util.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/geo/window.cpp
+)
diff --git a/library/cpp/geo/CMakeLists.linux-aarch64.txt b/library/cpp/geo/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..cdad35989a
--- /dev/null
+++ b/library/cpp/geo/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,25 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(library-cpp-geo)
+target_link_libraries(library-cpp-geo PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+)
+target_sources(library-cpp-geo PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/geo/bbox.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/geo/geo.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/geo/point.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/geo/polygon.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/geo/load_save_helper.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/geo/size.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/geo/util.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/geo/window.cpp
+)
diff --git a/library/cpp/geo/CMakeLists.linux-x86_64.txt b/library/cpp/geo/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..cdad35989a
--- /dev/null
+++ b/library/cpp/geo/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,25 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(library-cpp-geo)
+target_link_libraries(library-cpp-geo PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+)
+target_sources(library-cpp-geo PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/geo/bbox.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/geo/geo.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/geo/point.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/geo/polygon.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/geo/load_save_helper.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/geo/size.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/geo/util.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/geo/window.cpp
+)
diff --git a/library/cpp/geo/CMakeLists.txt b/library/cpp/geo/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/library/cpp/geo/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/library/cpp/geo/CMakeLists.windows-x86_64.txt b/library/cpp/geo/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..87e48b4a71
--- /dev/null
+++ b/library/cpp/geo/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,24 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(library-cpp-geo)
+target_link_libraries(library-cpp-geo PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+)
+target_sources(library-cpp-geo PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/geo/bbox.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/geo/geo.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/geo/point.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/geo/polygon.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/geo/load_save_helper.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/geo/size.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/geo/util.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/geo/window.cpp
+)
diff --git a/library/cpp/geo/bbox.cpp b/library/cpp/geo/bbox.cpp
new file mode 100644
index 0000000000..aa4258ac22
--- /dev/null
+++ b/library/cpp/geo/bbox.cpp
@@ -0,0 +1 @@
+#include "bbox.h"
diff --git a/library/cpp/geo/bbox.h b/library/cpp/geo/bbox.h
new file mode 100644
index 0000000000..7ec7e6f7d6
--- /dev/null
+++ b/library/cpp/geo/bbox.h
@@ -0,0 +1,59 @@
+#pragma once
+
+#include <util/generic/utility.h>
+
+#include "point.h"
+
+namespace NGeo {
+
+ class TGeoBoundingBox {
+ public:
+ TGeoBoundingBox()
+
+ = default;
+
+ TGeoBoundingBox(const TGeoPoint& p1, const TGeoPoint& p2) {
+ MinX_ = Min(p1.Lon(), p2.Lon());
+ MaxX_ = Max(p1.Lon(), p2.Lon());
+ MinY_ = Min(p1.Lat(), p2.Lat());
+ MaxY_ = Max(p1.Lat(), p2.Lat());
+ }
+
+ const double& GetMinX() const {
+ return MinX_;
+ }
+
+ const double& GetMaxX() const {
+ return MaxX_;
+ }
+
+ const double& GetMinY() const {
+ return MinY_;
+ }
+
+ const double& GetMaxY() const {
+ return MaxY_;
+ }
+
+ double Width() const {
+ return MaxX_ - MinX_;
+ }
+
+ double Height() const {
+ return MaxY_ - MinY_;
+ }
+
+ private:
+ double MinX_{std::numeric_limits<double>::quiet_NaN()};
+ double MaxX_{std::numeric_limits<double>::quiet_NaN()};
+ double MinY_{std::numeric_limits<double>::quiet_NaN()};
+ double MaxY_{std::numeric_limits<double>::quiet_NaN()};
+ };
+
+ inline bool operator==(const TGeoBoundingBox& a, const TGeoBoundingBox& b) {
+ return a.GetMinX() == b.GetMinX() &&
+ a.GetMinY() == b.GetMinY() &&
+ a.GetMaxX() == b.GetMaxX() &&
+ a.GetMaxY() == b.GetMaxY();
+ }
+} // namespace NGeo
diff --git a/library/cpp/geo/geo.cpp b/library/cpp/geo/geo.cpp
new file mode 100644
index 0000000000..37adc5c62c
--- /dev/null
+++ b/library/cpp/geo/geo.cpp
@@ -0,0 +1 @@
+#include "geo.h"
diff --git a/library/cpp/geo/geo.h b/library/cpp/geo/geo.h
new file mode 100644
index 0000000000..1aebacab5c
--- /dev/null
+++ b/library/cpp/geo/geo.h
@@ -0,0 +1,8 @@
+#pragma once
+
+#include "bbox.h"
+#include "point.h"
+#include "polygon.h"
+#include "size.h"
+#include "util.h"
+#include "window.h"
diff --git a/library/cpp/geo/load_save_helper.cpp b/library/cpp/geo/load_save_helper.cpp
new file mode 100644
index 0000000000..13fa7ac6df
--- /dev/null
+++ b/library/cpp/geo/load_save_helper.cpp
@@ -0,0 +1,49 @@
+#include "load_save_helper.h"
+#include <util/stream/input.h>
+
+void TSerializer<NGeo::TGeoPoint>::Save(IOutputStream* out, const NGeo::TGeoPoint& point) {
+ double lon = static_cast<double>(point.Lon());
+ double lat = static_cast<double>(point.Lat());
+ ::Save(out, lon);
+ ::Save(out, lat);
+}
+
+void TSerializer<NGeo::TGeoPoint>::Load(IInputStream* in, NGeo::TGeoPoint& point) {
+ double lon = std::numeric_limits<double>::quiet_NaN();
+ double lat = std::numeric_limits<double>::quiet_NaN();
+ ::Load(in, lon);
+ ::Load(in, lat);
+ point = {lon, lat};
+}
+
+void TSerializer<NGeo::TGeoWindow>::Save(IOutputStream* out, const NGeo::TGeoWindow& window) {
+ const auto& center = window.GetCenter();
+ const auto& size = window.GetSize();
+ ::Save(out, center);
+ ::Save(out, size);
+}
+
+void TSerializer<NGeo::TGeoWindow>::Load(IInputStream* in, NGeo::TGeoWindow& window) {
+ NGeo::TSize size{};
+ NGeo::TGeoPoint center{};
+
+ ::Load(in, center);
+ ::Load(in, size);
+
+ window = {center, size};
+}
+
+void TSerializer<NGeo::TSize>::Save(IOutputStream* out, const NGeo::TSize& size) {
+ double width = static_cast<double>(size.GetWidth());
+ double height = static_cast<double>(size.GetHeight());
+ ::Save(out, width);
+ ::Save(out, height);
+}
+
+void TSerializer<NGeo::TSize>::Load(IInputStream* in, NGeo::TSize& size) {
+ double width = std::numeric_limits<double>::quiet_NaN();
+ double height = std::numeric_limits<double>::quiet_NaN();
+ ::Load(in, width);
+ ::Load(in, height);
+ size = {width, height};
+}
diff --git a/library/cpp/geo/load_save_helper.h b/library/cpp/geo/load_save_helper.h
new file mode 100644
index 0000000000..4a5fceea18
--- /dev/null
+++ b/library/cpp/geo/load_save_helper.h
@@ -0,0 +1,23 @@
+#pragma once
+
+#include <library/cpp/geo/window.h>
+#include <util/stream/input.h>
+#include <util/ysaveload.h>
+
+template <>
+struct TSerializer<NGeo::TGeoPoint> {
+ static void Save(IOutputStream*, const NGeo::TGeoPoint&);
+ static void Load(IInputStream*, NGeo::TGeoPoint&);
+};
+
+template <>
+struct TSerializer<NGeo::TGeoWindow> {
+ static void Save(IOutputStream*, const NGeo::TGeoWindow&);
+ static void Load(IInputStream*, NGeo::TGeoWindow&);
+};
+
+template <>
+struct TSerializer<NGeo::TSize> {
+ static void Save(IOutputStream*, const NGeo::TSize&);
+ static void Load(IInputStream*, NGeo::TSize&);
+};
diff --git a/library/cpp/geo/point.cpp b/library/cpp/geo/point.cpp
new file mode 100644
index 0000000000..1d227c967f
--- /dev/null
+++ b/library/cpp/geo/point.cpp
@@ -0,0 +1,146 @@
+#include "point.h"
+#include "util.h"
+
+#include <util/generic/ylimits.h>
+#include <util/generic/ymath.h>
+
+#include <cstdlib>
+#include <utility>
+
+namespace NGeo {
+ namespace {
+ bool IsNonDegeneratePoint(double lon, double lat) {
+ return (MIN_LONGITUDE - WORLD_WIDTH < lon && lon < MAX_LONGITUDE + WORLD_WIDTH) &&
+ (MIN_LATITUDE < lat && lat < MAX_LATITUDE);
+ }
+ } // namespace
+
+ float TGeoPoint::Distance(const TGeoPoint& p) const noexcept {
+ auto dp = p - (*this);
+ return sqrtf(Sqr(GetWidthAtEquator(dp.GetWidth(), (Lat_ + p.Lat()) * 0.5)) + Sqr(dp.GetHeight()));
+ }
+
+ bool TGeoPoint::IsPole() const noexcept {
+ return Lat_ <= MIN_LATITUDE || MAX_LATITUDE <= Lat_;
+ }
+
+ bool TGeoPoint::IsVisibleOnMap() const noexcept {
+ return -VISIBLE_LATITUDE_BOUND <= Lat_ && Lat_ <= VISIBLE_LATITUDE_BOUND;
+ }
+
+ TGeoPoint TGeoPoint::Parse(TStringBuf s, TStringBuf delimiter) {
+ const auto& [lon, lat] = PairFromString(s, delimiter);
+ Y_ENSURE_EX(IsNonDegeneratePoint(lon, lat), TBadCastException() << "Invalid point: (" << lon << ", " << lat << ")");
+ return {lon, lat};
+ }
+
+ TMaybe<TGeoPoint> TGeoPoint::TryParse(TStringBuf s, TStringBuf delimiter) {
+ std::pair<double, double> lonLat;
+ if (!TryPairFromString(lonLat, s, delimiter)) {
+ return {};
+ }
+ if (!IsNonDegeneratePoint(lonLat.first, lonLat.second)) {
+ return {};
+ }
+ return TGeoPoint(lonLat.first, lonLat.second);
+ }
+
+ TSize operator-(const TGeoPoint& p1, const TGeoPoint& p2) {
+ return {p1.Lon() - p2.Lon(), p1.Lat() - p2.Lat()};
+ }
+
+ /*
+ Conversion code was imported from http://wiki.yandex-team.ru/YandexMobile/maps/Algorithm/mapengine/coordtransforms
+ */
+ namespace WGS84 {
+ /* Isometric to geodetic latitude parameters, default to WGS 84 */
+ const double ab = 0.00335655146887969400;
+ const double bb = 0.00000657187271079536;
+ const double cb = 0.00000001764564338702;
+ const double db = 0.00000000005328478445;
+
+ const double _a = R;
+ const double _f = 1.0 / 298.257223563;
+ const double _b = _a - _f * _a;
+ const double _e = sqrt(1 - pow(_b / _a, 2));
+ const double _e2 = _e * _e;
+ const double _g = sqrt(1.0 - _e2);
+ const double _gR2 = _g * R * 2.0;
+ } // namespace WGS84
+
+ TGeoPoint MercatorToLL(TMercatorPoint pt) {
+ using namespace WGS84;
+
+ // Y_ENSURE(pt.IsDefined(), "Point is not defined");
+
+ /* Isometric latitude*/
+ const double xphi = PI / 2.0 - 2.0 * atan(exp(-pt.Y_ / R));
+
+ double latitude = xphi + ab * sin(2.0 * xphi) + bb * sin(4.0 * xphi) + cb * sin(6.0 * xphi) + db * sin(8.0 * xphi);
+ double longitude = pt.X_ / R;
+
+ return TGeoPoint{Rad2deg(longitude), Rad2deg(latitude)};
+ }
+
+ double GetMercatorY(const TGeoPoint& ll) {
+ if (Y_UNLIKELY(ll.Lat() == 0.)) {
+ // shortcut for common case, avoiding floating point errors
+ return 0.;
+ }
+ if (Y_UNLIKELY(ll.Lat() == MIN_LATITUDE)) {
+ return -std::numeric_limits<double>::infinity();
+ }
+ if (Y_UNLIKELY(ll.Lat() == MAX_LATITUDE)) {
+ return +std::numeric_limits<double>::infinity();
+ }
+ double lat = Deg2rad(ll.Lat());
+ double esinLat = WGS84::_e * sin(lat);
+
+ double tan_temp = tan(PI / 4.e0 + lat / 2.e0);
+ double pow_temp = pow(tan(PI / 4.e0 + asin(esinLat) / 2), WGS84::_e);
+ double U = tan_temp / pow_temp;
+ return WGS84::R * log(U);
+ }
+
+ TMercatorPoint LLToMercator(TGeoPoint ll) {
+ // Y_ENSURE(ll.IsValid(), "Point is not defined");
+
+ // Y_ENSURE(-90. <= ll.Lat() && ll.Lat() <= +90., "Latitude is out of range [-90, 90]");
+
+ double lon = Deg2rad(ll.Lon());
+ double x = WGS84::R * lon;
+ double y = GetMercatorY(ll);
+
+ return TMercatorPoint{x, y};
+ }
+
+ double GeodeticDistance(TGeoPoint p1, TGeoPoint p2) {
+ using namespace WGS84;
+
+ constexpr double deg2HalfRad = PI / 360.0;
+
+ const double lon1Half = p1.Lon() * deg2HalfRad;
+ const double lon2Half = p2.Lon() * deg2HalfRad;
+
+ const double lat1Half = p1.Lat() * deg2HalfRad;
+ const double lat2Half = p2.Lat() * deg2HalfRad;
+
+ const double diffLatHalf = fabs(lat1Half - lat2Half);
+ const double diffLonHalf = fabs(lon1Half - lon2Half);
+
+ if (diffLatHalf < 0.5e-8 && diffLonHalf < 0.5e-8) {
+ return 0;
+ }
+
+ double s = sin(lat1Half + lat2Half);
+ double s2 = s * s;
+ double m = _gR2 / (1.0 - _e2 * s2);
+
+ const double w = sin(diffLatHalf);
+ const double w2 = w * w;
+ const double cc = Max(1.0 - s2 - w2, 0.0); // cos(lat1Half * 2) * cos(lat2Half * 2)
+ const double z = sin(diffLonHalf);
+
+ return m * asin(sqrt(w2 + cc * z * z));
+ }
+} // namespace NGeo
diff --git a/library/cpp/geo/point.h b/library/cpp/geo/point.h
new file mode 100644
index 0000000000..70c91ab2dd
--- /dev/null
+++ b/library/cpp/geo/point.h
@@ -0,0 +1,198 @@
+#pragma once
+
+#include <util/generic/string.h>
+#include <util/stream/output.h>
+#include <util/string/cast.h>
+#include <util/generic/maybe.h>
+
+#include <algorithm>
+#include <cmath>
+
+namespace NGeo {
+ class TSize;
+
+ class TGeoPoint {
+ public:
+ TGeoPoint(double lon, double lat) noexcept
+ : Lon_(lon)
+ , Lat_(lat)
+ {
+ }
+
+ TGeoPoint() noexcept
+ : Lon_(BadX)
+ , Lat_(BadY)
+ {
+ }
+
+ double Lon() const noexcept {
+ return Lon_;
+ }
+
+ double Lat() const noexcept {
+ return Lat_;
+ }
+
+ float Distance(const TGeoPoint& p) const noexcept;
+
+ void swap(TGeoPoint& p) noexcept {
+ std::swap(Lon_, p.Lon_);
+ std::swap(Lat_, p.Lat_);
+ }
+
+ bool IsValid() const {
+ return (Lon_ != BadX) && (Lat_ != BadY);
+ }
+
+ /// Returns true if the point represents either North or South Pole
+ bool IsPole() const noexcept;
+
+ /// Returns true if the point may be shown on the Yandex Map (fits into the valid range of latitudes)
+ bool IsVisibleOnMap() const noexcept;
+
+ bool operator!() const {
+ return !IsValid();
+ }
+
+ TString ToCgiStr() const {
+ return ToString();
+ }
+
+ TString ToString(const char* delimiter = ",") const {
+ return TString::Join(::ToString(Lon_), delimiter, ::ToString(Lat_));
+ }
+
+ /**
+ * \note Parsing functions work is safe way. They discard invalid points:
+ * 1) on the Poles and 'beyond' the Poles;
+ * 2) not belonging to the 'main' world and +/-1 world to the left or to the right.
+ * If you need such cases, construct the TGeoPoint manually.
+ */
+
+ /// Throws TBadCastException on error
+ static TGeoPoint Parse(TStringBuf s, TStringBuf delimiter = TStringBuf(","));
+
+ /// Returns Nothing() on error
+ static TMaybe<TGeoPoint> TryParse(TStringBuf s, TStringBuf delimiter = TStringBuf(","));
+
+ private:
+ double Lon_;
+ double Lat_;
+
+ static constexpr double BadX{361.};
+ static constexpr double BadY{181.};
+ };
+
+ double GeodeticDistance(TGeoPoint p1, TGeoPoint p2);
+
+ /**
+ * \class TMercatorPoint
+ *
+ * Represents a point in EPSG:3395 projection
+ * (WGS 84 / World Mercator)
+ */
+ class TMercatorPoint {
+ public:
+ friend class TMercatorWindow;
+ friend TGeoPoint MercatorToLL(TMercatorPoint);
+
+ /**
+ * Constructs a point with the given coordinates.
+ */
+ constexpr TMercatorPoint(double x, double y) noexcept
+ : X_{x}
+ , Y_{y}
+ {
+ }
+
+ /**
+ * Constructs a point with two NaN coordinates.
+ *
+ * Should not be called directly.
+ * If your `point` variable might be undefined,
+ * declare it explicitly as TMaybe<TMercatorPoint>.
+ */
+ constexpr TMercatorPoint() noexcept
+ : X_{std::numeric_limits<double>::quiet_NaN()}
+ , Y_{std::numeric_limits<double>::quiet_NaN()}
+ {
+ }
+
+ /**
+ * Returns the X_ coordinate.
+ *
+ * The line X_ == 0 corresponds to the Prime meridian.
+ */
+ constexpr double X() const noexcept {
+ return X_;
+ }
+
+ /**
+ * Returns the Y_ coordinate.
+ *
+ * The line Y_ == 0 corresponds to the Equator.
+ */
+ constexpr double Y() const noexcept {
+ return Y_;
+ }
+
+ private:
+ bool IsDefined() const noexcept {
+ return !std::isnan(X_) && !std::isnan(Y_);
+ }
+
+ private:
+ double X_;
+ double Y_;
+ };
+
+ /**
+ * Operators
+ */
+
+ inline bool operator==(const TGeoPoint& p1, const TGeoPoint& p2) {
+ return p1.Lon() == p2.Lon() && p1.Lat() == p2.Lat();
+ }
+
+ inline bool operator==(const TMercatorPoint& p1, const TMercatorPoint& p2) {
+ return p1.X() == p2.X() && p1.Y() == p2.Y();
+ }
+
+ inline bool operator<(const TGeoPoint& p1, const TGeoPoint& p2) {
+ if (p1.Lon() != p2.Lon()) {
+ return p1.Lon() < p2.Lon();
+ }
+ return p1.Lat() < p2.Lat();
+ }
+
+ /**
+ * Conversion
+ */
+
+ namespace WGS84 {
+ /* Radius of reference ellipsoid, default to WGS 84 */
+ const double R = 6378137.0;
+ } // namespace WGS84
+
+ using TPointLL = TGeoPoint;
+ using TPointXY = TMercatorPoint;
+
+ TGeoPoint MercatorToLL(TMercatorPoint);
+ TMercatorPoint LLToMercator(TGeoPoint);
+
+ /**
+ * Input/output
+ */
+
+ TSize operator-(const TGeoPoint& p1, const TGeoPoint& p2);
+} // namespace NGeo
+
+template <>
+inline void Out<NGeo::TGeoPoint>(IOutputStream& o, const NGeo::TGeoPoint& p) {
+ o << '[' << p.Lon() << ", " << p.Lat() << ']';
+}
+
+template <>
+inline void Out<NGeo::TMercatorPoint>(IOutputStream& o, const NGeo::TMercatorPoint& p) {
+ o << '[' << p.X() << ", " << p.Y() << ']';
+}
diff --git a/library/cpp/geo/polygon.cpp b/library/cpp/geo/polygon.cpp
new file mode 100644
index 0000000000..44e5c38b5f
--- /dev/null
+++ b/library/cpp/geo/polygon.cpp
@@ -0,0 +1,28 @@
+#include "polygon.h"
+namespace NGeo {
+ TMaybe<TGeoPolygon> TGeoPolygon::TryParse(TStringBuf s, TStringBuf llDelimiter, TStringBuf pointsDelimiter) {
+ TVector<TGeoPoint> points;
+
+ for (const auto& pointString : StringSplitter(s).SplitByString(pointsDelimiter).SkipEmpty()) {
+ auto curPoint = TGeoPoint::TryParse(pointString.Token(), llDelimiter);
+ if (!curPoint) {
+ return {};
+ }
+ points.push_back(*curPoint);
+ }
+
+ if (points.size() < 3) {
+ return {};
+ }
+
+ return TGeoPolygon(points);
+ }
+
+ TGeoPolygon TGeoPolygon::Parse(TStringBuf s, TStringBuf llDelimiter, TStringBuf pointsDelimiter) {
+ auto res = TGeoPolygon::TryParse(s, llDelimiter, pointsDelimiter);
+ if (!res) {
+ ythrow yexception() << "Can't parse polygon from input string: " << s;
+ }
+ return *res;
+ }
+} // namespace NGeo
diff --git a/library/cpp/geo/polygon.h b/library/cpp/geo/polygon.h
new file mode 100644
index 0000000000..1528345fec
--- /dev/null
+++ b/library/cpp/geo/polygon.h
@@ -0,0 +1,90 @@
+#pragma once
+
+#include "point.h"
+#include "window.h"
+
+#include <util/ysaveload.h>
+#include <util/generic/algorithm.h>
+#include <util/generic/string.h>
+#include <util/generic/vector.h>
+#include <util/generic/yexception.h>
+#include <util/stream/output.h>
+#include <util/string/cast.h>
+#include <util/string/join.h>
+#include <util/string/split.h>
+
+#include <algorithm>
+#include <functional>
+
+namespace NGeo {
+ class TGeoPolygon {
+ private:
+ TVector<TGeoPoint> Points_;
+ TGeoWindow Window_;
+
+ public:
+ TGeoPolygon() = default;
+
+ explicit TGeoPolygon(const TVector<TGeoPoint>& points)
+ : Points_(points)
+ {
+ CalcWindow();
+ }
+
+ const TVector<TGeoPoint>& GetPoints() const {
+ return Points_;
+ }
+
+ const TGeoWindow& GetWindow() const {
+ return Window_;
+ }
+
+ void swap(TGeoPolygon& o) noexcept {
+ Points_.swap(o.Points_);
+ Window_.swap(o.Window_);
+ }
+
+ bool IsValid() const noexcept {
+ return !Points_.empty() && Window_.IsValid();
+ }
+
+ bool operator!() const {
+ return !IsValid();
+ }
+
+ /**
+ * try to parse TGeoPolygon from string which stores points
+ * coords are separated by llDelimiter, points are separated by pointsDelimiter
+ * return parsed TGeoPolygon on success, otherwise throw exception
+ */
+ static TGeoPolygon Parse(TStringBuf s, TStringBuf llDelimiter = ",", TStringBuf pointsDelimiter = TStringBuf(" "));
+
+ /**
+ * try to parse TGeoPolygon from string which stores points
+ * coords are separated by llDelimiter, points are separated by pointsDelimiter
+ * return TMaybe of parsed TGeoPolygon on success, otherwise return empty TMaybe
+ */
+ static TMaybe<TGeoPolygon> TryParse(TStringBuf s, TStringBuf llDelimiter = ",", TStringBuf pointsDelimiter = TStringBuf(" "));
+
+ private:
+ void CalcWindow() {
+ auto getLon = std::mem_fn(&TGeoPoint::Lon);
+ double lowerX = MinElementBy(Points_.begin(), Points_.end(), getLon)->Lon();
+ double upperX = MaxElementBy(Points_.begin(), Points_.end(), getLon)->Lon();
+
+ auto getLat = std::mem_fn(&TGeoPoint::Lat);
+ double lowerY = MinElementBy(Points_.begin(), Points_.end(), getLat)->Lat();
+ double upperY = MaxElementBy(Points_.begin(), Points_.end(), getLat)->Lat();
+
+ Window_ = TGeoWindow{TGeoPoint{lowerX, lowerY}, TGeoPoint{upperX, upperY}};
+ }
+ };
+
+ inline bool operator==(const TGeoPolygon& p1, const TGeoPolygon& p2) {
+ return p1.GetPoints() == p2.GetPoints();
+ }
+
+ inline bool operator!=(const TGeoPolygon& p1, const TGeoPolygon& p2) {
+ return !(p1 == p2);
+ }
+} // namespace NGeo
diff --git a/library/cpp/geo/size.cpp b/library/cpp/geo/size.cpp
new file mode 100644
index 0000000000..f1bd8ab763
--- /dev/null
+++ b/library/cpp/geo/size.cpp
@@ -0,0 +1,31 @@
+#include "size.h"
+
+#include "util.h"
+
+namespace NGeo {
+ const double TSize::BadWidth = -1.;
+ const double TSize::BadHeight = -1.;
+
+ namespace {
+ bool IsNonNegativeSize(double width, double height) {
+ return width >= 0. && height >= 0.;
+ }
+ } // namespace
+
+ TSize TSize::Parse(TStringBuf s, TStringBuf delimiter) {
+ const auto& [width, height] = PairFromString(s, delimiter);
+ Y_ENSURE_EX(IsNonNegativeSize(width, height), TBadCastException() << "Negative window size");
+ return {width, height};
+ }
+
+ TMaybe<TSize> TSize::TryParse(TStringBuf s, TStringBuf delimiter) {
+ std::pair<double, double> lonLat;
+ if (!TryPairFromString(lonLat, s, delimiter)) {
+ return {};
+ }
+ if (!IsNonNegativeSize(lonLat.first, lonLat.second)) {
+ return {};
+ }
+ return TSize{lonLat.first, lonLat.second};
+ }
+} // namespace NGeo
diff --git a/library/cpp/geo/size.h b/library/cpp/geo/size.h
new file mode 100644
index 0000000000..b619c6d899
--- /dev/null
+++ b/library/cpp/geo/size.h
@@ -0,0 +1,93 @@
+#pragma once
+
+#include <util/generic/string.h>
+#include <util/stream/output.h>
+#include <util/string/cast.h>
+
+namespace NGeo {
+ class TSize {
+ public:
+ TSize(double width, double height) noexcept
+ : Width_(width)
+ , Height_(height)
+ {
+ }
+
+ explicit TSize(double size) noexcept
+ : Width_(size)
+ , Height_(size)
+ {
+ }
+
+ TSize() noexcept
+ : Width_(BadWidth)
+ , Height_(BadHeight)
+ {
+ }
+
+ double GetWidth() const noexcept {
+ return Width_;
+ }
+
+ double GetHeight() const noexcept {
+ return Height_;
+ }
+
+ void swap(TSize& s) noexcept {
+ std::swap(Width_, s.Width_);
+ std::swap(Height_, s.Height_);
+ }
+
+ bool IsValid() const {
+ return (Width_ != BadWidth) && (Height_ != BadHeight);
+ }
+
+ void Stretch(double multiplier) {
+ Width_ *= multiplier;
+ Height_ *= multiplier;
+ }
+
+ void Inflate(double additionX, double additionY) {
+ Width_ += additionX;
+ Height_ += additionY;
+ }
+
+ bool operator!() const {
+ return !IsValid();
+ }
+
+ TString ToCgiStr() const {
+ TString s = ToString(Width_);
+ s.append(',');
+ s.append(ToString(Height_));
+ return s;
+ }
+
+ /**
+ * try to parse TSize
+ * return parsed TSize on success, otherwise throw exception
+ */
+ static TSize Parse(TStringBuf s, TStringBuf delimiter = TStringBuf(","));
+
+ /**
+ * try to parse TSize
+ * return TMaybe of parsed TSize on success, otherwise return empty TMaybe
+ */
+ static TMaybe<TSize> TryParse(TStringBuf s, TStringBuf delimiter = TStringBuf(","));
+
+ private:
+ double Width_;
+ double Height_;
+ static const double BadWidth;
+ static const double BadHeight;
+ };
+
+ inline bool operator==(const TSize& p1, const TSize& p2) {
+ return p1.GetHeight() == p2.GetHeight() && p1.GetWidth() == p2.GetWidth();
+ }
+} // namespace NGeo
+
+template <>
+inline void Out<NGeo::TSize>(IOutputStream& o, const NGeo::TSize& s) {
+ o << '<' << s.GetWidth() << ", " << s.GetHeight() << '>';
+}
diff --git a/library/cpp/geo/style/ya.make b/library/cpp/geo/style/ya.make
new file mode 100644
index 0000000000..f72d50f27e
--- /dev/null
+++ b/library/cpp/geo/style/ya.make
@@ -0,0 +1,8 @@
+CPP_STYLE_TEST_14()
+
+STYLE(
+ library/cpp/geo/**/*.cpp
+ library/cpp/geo/**/*.h
+)
+
+END()
diff --git a/library/cpp/geo/ut/load_save_helper_ut.cpp b/library/cpp/geo/ut/load_save_helper_ut.cpp
new file mode 100644
index 0000000000..f251f56630
--- /dev/null
+++ b/library/cpp/geo/ut/load_save_helper_ut.cpp
@@ -0,0 +1,90 @@
+#include "load_save_helper.h"
+#include "point.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+#include <util/stream/str.h>
+#include <util/ysaveload.h>
+
+namespace {
+ void CheckSave(const NGeo::TGeoPoint& point) {
+ TStringStream output;
+ ::Save(&output, point);
+ TStringStream answer;
+ ::Save(&answer, static_cast<double>(point.Lon()));
+ ::Save(&answer, static_cast<double>(point.Lat()));
+ UNIT_ASSERT_EQUAL(output.Str(), answer.Str());
+ }
+
+ void CheckLoad(const double x, const double y) {
+ TStringStream input;
+ ::Save(&input, x);
+ ::Save(&input, y);
+ NGeo::TGeoPoint output;
+ ::Load(&input, output);
+
+ const double eps = 1.E-8;
+ UNIT_ASSERT_DOUBLES_EQUAL(static_cast<double>(output.Lon()), x, eps);
+ UNIT_ASSERT_DOUBLES_EQUAL(static_cast<double>(output.Lat()), y, eps);
+ }
+
+ void CheckLoadAfterSavePointLL(double x, double y) {
+ NGeo::TGeoPoint answer = {x, y};
+ TStringStream iostream;
+ ::Save(&iostream, answer);
+ NGeo::TGeoPoint output;
+ ::Load(&iostream, output);
+
+ const double eps = 1.E-8;
+ UNIT_ASSERT_DOUBLES_EQUAL(static_cast<double>(output.Lon()), x, eps);
+ UNIT_ASSERT_DOUBLES_EQUAL(static_cast<double>(output.Lat()), y, eps);
+ }
+
+ void CheckLoadAfterSaveWindowLL(NGeo::TGeoPoint center, NGeo::TSize size) {
+ NGeo::TGeoWindow answer = {center, size};
+ TStringStream iostream;
+ ::Save(&iostream, answer);
+ NGeo::TGeoWindow output;
+ ::Load(&iostream, output);
+ UNIT_ASSERT_EQUAL(output.GetCenter(), answer.GetCenter());
+ UNIT_ASSERT_EQUAL(output.GetSize(), answer.GetSize());
+ }
+} // namespace
+
+Y_UNIT_TEST_SUITE(TSaveLoadForPointLL) {
+ Y_UNIT_TEST(TestSave) {
+ // {27.561481, 53.902496} Minsk Lon and Lat
+ CheckSave({27.561481, 53.902496});
+ CheckSave({-27.561481, 53.902496});
+ CheckSave({27.561481, -53.902496});
+ CheckSave({-27.561481, -53.902496});
+ }
+
+ Y_UNIT_TEST(TestLoad) {
+ CheckLoad(27.561481, 53.902496);
+ CheckLoad(-27.561481, 53.902496);
+ CheckLoad(27.561481, -53.902496);
+ CheckLoad(-27.561481, -53.902496);
+ }
+
+ Y_UNIT_TEST(TestSaveLoad) {
+ CheckLoadAfterSavePointLL(27.561481, 53.902496);
+ CheckLoadAfterSavePointLL(-27.561481, 53.902496);
+ CheckLoadAfterSavePointLL(27.561481, -53.902496);
+ CheckLoadAfterSavePointLL(-27.561481, -53.902496);
+ CheckLoadAfterSavePointLL(0, 0);
+ }
+}
+
+Y_UNIT_TEST_SUITE(TSaveLoadForWindowLL) {
+ Y_UNIT_TEST(TestSave) {
+ CheckLoadAfterSaveWindowLL({27.561481, 53.902496}, {1, 2});
+ CheckLoadAfterSaveWindowLL({27.561481, 53.902496}, {2, 1});
+ CheckLoadAfterSaveWindowLL({-27.561481, 53.902496}, {1, 2});
+ CheckLoadAfterSaveWindowLL({-27.561481, 53.902496}, {2, 1});
+ CheckLoadAfterSaveWindowLL({27.561481, -53.902496}, {1, 2});
+ CheckLoadAfterSaveWindowLL({27.561481, -53.902496}, {2, 1});
+ CheckLoadAfterSaveWindowLL({-27.561481, -53.902496}, {1, 2});
+ CheckLoadAfterSaveWindowLL({-27.561481, -53.902496}, {2, 1});
+ CheckLoadAfterSaveWindowLL({0, 0}, {0, 0});
+ }
+}
diff --git a/library/cpp/geo/ut/point_ut.cpp b/library/cpp/geo/ut/point_ut.cpp
new file mode 100644
index 0000000000..bbf8f32cea
--- /dev/null
+++ b/library/cpp/geo/ut/point_ut.cpp
@@ -0,0 +1,171 @@
+#include "point.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+using namespace NGeo;
+
+namespace {
+ void CheckMercator(TGeoPoint input, TMercatorPoint answer, double eps = 1.e-8) {
+ auto output = LLToMercator(input);
+ UNIT_ASSERT_DOUBLES_EQUAL(output.X(), answer.X(), eps);
+ UNIT_ASSERT_DOUBLES_EQUAL(output.Y(), answer.Y(), eps);
+ }
+
+ void CheckGeo(TMercatorPoint input, TGeoPoint answer, double eps = 1.e-8) {
+ auto output = MercatorToLL(input);
+ UNIT_ASSERT_DOUBLES_EQUAL(output.Lon(), answer.Lon(), eps);
+ UNIT_ASSERT_DOUBLES_EQUAL(output.Lat(), answer.Lat(), eps);
+ }
+} // namespace
+
+Y_UNIT_TEST_SUITE(TPointTest) {
+ Y_UNIT_TEST(TestGeoPointFromString) {
+ UNIT_ASSERT_EQUAL(TGeoPoint::Parse("0.15,0.67"),
+ TGeoPoint(0.15, 0.67));
+ UNIT_ASSERT_EQUAL(TGeoPoint::Parse("-52.,-27."),
+ TGeoPoint(-52., -27.));
+ UNIT_ASSERT_EQUAL(TGeoPoint::Parse("0.15 0.67", " "),
+ TGeoPoint(0.15, 0.67));
+ UNIT_ASSERT_EQUAL(TGeoPoint::Parse("-27. -52", " "),
+ TGeoPoint(-27., -52.));
+ UNIT_ASSERT_EQUAL(TGeoPoint::Parse("182,55"),
+ TGeoPoint(182., 55.));
+
+ // current behavior
+ UNIT_ASSERT(TGeoPoint::TryParse(TString{}).Empty());
+ UNIT_ASSERT_EXCEPTION(TGeoPoint::Parse("Hello,world"), TBadCastException);
+ UNIT_ASSERT_EXCEPTION(TGeoPoint::Parse("640 17", " "), TBadCastException);
+ UNIT_ASSERT_EXCEPTION(TGeoPoint::Parse("50.,100"), TBadCastException);
+ UNIT_ASSERT_EQUAL(TGeoPoint::Parse(" 0.01, 0.01"), TGeoPoint(0.01, 0.01));
+ UNIT_ASSERT_EXCEPTION(TGeoPoint::Parse("0.01 , 0.01"), TBadCastException);
+ UNIT_ASSERT_EXCEPTION(TGeoPoint::Parse("0.01, 0.01 "), TBadCastException);
+ }
+}
+
+Y_UNIT_TEST_SUITE(TConversionTest) {
+ Y_UNIT_TEST(TestConversionGeoToMercator) {
+ // test data is obtained using PostGIS:
+ // SELECT ST_AsText(ST_Transform(ST_SetSRID(ST_MakePoint(lon, lat), 4326), 3395))
+
+ CheckMercator({27.547028, 53.893962}, {3066521.12982805, 7115552.47353991});
+ CheckMercator({-70.862782, -53.002613}, {-7888408.80843475, -6949331.55685883});
+ CheckMercator({37.588536, 55.734004}, {4184336.68718463, 7470303.90973406});
+ CheckMercator({0., 0.}, {0, 0});
+ }
+
+ Y_UNIT_TEST(TestConversionMercatorToGeo) {
+ // test data is obtained using PostGIS:
+ // SELECT ST_AsText(ST_Transform(ST_SetSRID(ST_MakePoint(X, Y), 3395), 4326))
+
+ CheckGeo({3066521, 7115552}, {27.5470268337348, 53.8939594873943});
+ CheckGeo({-7888409, -6949332}, {-70.8627837208599, -53.0026154014032});
+ CheckGeo({4184336, 7470304}, {37.5885298269154, 55.734004457522});
+ CheckGeo({0, 0}, {0., 0.});
+ }
+
+ Y_UNIT_TEST(TestExactConversion) {
+ // Zero maps to zero with no epsilons
+ UNIT_ASSERT_VALUES_EQUAL(LLToMercator({0., 0.}).X(), 0.);
+ UNIT_ASSERT_VALUES_EQUAL(LLToMercator({0., 0.}).Y(), 0.);
+ UNIT_ASSERT_VALUES_EQUAL(MercatorToLL({0., 0.}).Lon(), 0.);
+ UNIT_ASSERT_VALUES_EQUAL(MercatorToLL({0., 0.}).Lat(), 0.);
+ }
+
+ Y_UNIT_TEST(TestPoles) {
+ UNIT_ASSERT_VALUES_EQUAL(LLToMercator({0, 90}).Y(), std::numeric_limits<double>::infinity());
+ UNIT_ASSERT_VALUES_EQUAL(LLToMercator({0, -90}).Y(), -std::numeric_limits<double>::infinity());
+
+ UNIT_ASSERT_VALUES_EQUAL(MercatorToLL({0, std::numeric_limits<double>::infinity()}).Lat(), 90.);
+ UNIT_ASSERT_VALUES_EQUAL(MercatorToLL({0, -std::numeric_limits<double>::infinity()}).Lat(), -90.);
+ }
+
+ Y_UNIT_TEST(TestNearPoles) {
+ // Reference values were obtained using mpmath library (floating-point arithmetic with arbitrary precision)
+ CheckMercator({0., 89.9}, {0., 44884542.157175040}, 1.e-6);
+ CheckMercator({0., 89.99}, {0., 59570746.872518855}, 1.e-5);
+ CheckMercator({0., 89.999}, {0., 74256950.065173316}, 1.e-4);
+ CheckMercator({0., 89.9999}, {0., 88943153.242600886}, 1.e-3);
+ CheckMercator({0., 89.99999}, {0., 103629356.41987618}, 1.e-1);
+ CheckMercator({0., 89.999999}, {0., 118315559.59714996}, 1.e-1);
+ CheckMercator({0., 89.9999999}, {0., 133001762.77442373}, 1.e-0);
+ CheckMercator({0., 89.99999999}, {0., 147687965.95169749}, 1.e+1);
+ CheckMercator({0., 89.9999999999999857891452847979962825775146484375}, {0., 233563773.75716050}, 1.e+7);
+
+ CheckGeo({0., 233563773.75716050}, {0., 89.9999999999999857891452847979962825775146484375}, 1.e-15);
+ CheckGeo({0., 147687965.95169749}, {0., 89.99999999}, 1.e-13);
+ CheckGeo({0., 133001762.77442373}, {0., 89.9999999}, 1.e-13);
+ CheckGeo({0., 118315559.59714996}, {0., 89.999999}, 1.e-13);
+ CheckGeo({0., 103629356.41987618}, {0., 89.99999}, 1.e-13);
+ CheckGeo({0., 88943153.242600886}, {0., 89.9999}, 1.e-13);
+ CheckGeo({0., 74256950.065173316}, {0., 89.999}, 1.e-13);
+ CheckGeo({0., 59570746.872518855}, {0., 89.99}, 1.e-13);
+ CheckGeo({0., 44884542.157175040}, {0., 89.9}, 1.e-13);
+ }
+
+ Y_UNIT_TEST(TestVisibleRange) {
+ UNIT_ASSERT(TGeoPoint(37., 55.).IsVisibleOnMap());
+ UNIT_ASSERT(!TGeoPoint(37., 86.).IsVisibleOnMap());
+ UNIT_ASSERT(TGeoPoint(37., -85.).IsVisibleOnMap());
+ UNIT_ASSERT(!TGeoPoint(37., -90.).IsVisibleOnMap());
+ }
+
+ Y_UNIT_TEST(TestRoundTripGeoMercatorGeo) {
+ auto check = [](double longitude, double latitude) {
+ auto pt = MercatorToLL(LLToMercator(TGeoPoint{longitude, latitude}));
+ UNIT_ASSERT_DOUBLES_EQUAL_C(longitude, pt.Lon(), 1.e-12, "longitude for point (" << longitude << ", " << latitude << ")");
+ UNIT_ASSERT_DOUBLES_EQUAL_C(latitude, pt.Lat(), 1.e-8, "latitude for point (" << longitude << ", " << latitude << ")");
+ };
+
+ check(37., 55.);
+ check(0.1, 0.1);
+ check(0.2, 89.9);
+ check(181., -42.);
+ check(362., -43.);
+ check(-183., -87.);
+ check(1000., -77.);
+ }
+
+ Y_UNIT_TEST(TestRoundTripMercatorGeoMercator) {
+ auto check = [](double x, double y) {
+ auto pt = LLToMercator(MercatorToLL(TMercatorPoint{x, y}));
+ UNIT_ASSERT_DOUBLES_EQUAL_C(x, pt.X(), 1.e-4, "x for point (" << x << ", " << y << ")");
+ UNIT_ASSERT_DOUBLES_EQUAL_C(y, pt.Y(), 1.e-4, "y for point (" << x << ", " << y << ")");
+ };
+
+ check(100., 200.);
+ check(-123456., 654321.);
+ check(5.e7, 1.23456789);
+ check(1.e8, -2.e7);
+ }
+}
+
+Y_UNIT_TEST_SUITE(TestDistance) {
+ Y_UNIT_TEST(TestGeodeticDistance) {
+ const TGeoPoint minsk(27.55, 53.916667);
+ const TGeoPoint moscow(37.617778, 55.755833);
+ const TGeoPoint newYork(-73.994167, 40.728333);
+ const TGeoPoint sydney(151.208333, -33.869444);
+
+ const double eps = 1.E-6; // absolute error
+
+ UNIT_ASSERT_DOUBLES_EQUAL(GeodeticDistance(minsk, minsk), 0.0, eps);
+ UNIT_ASSERT_DOUBLES_EQUAL(GeodeticDistance(minsk, moscow), 677190.08871321136, eps);
+ UNIT_ASSERT_DOUBLES_EQUAL(GeodeticDistance(minsk, newYork), 7129091.7536358498, eps);
+ UNIT_ASSERT_DOUBLES_EQUAL(GeodeticDistance(minsk, sydney), 15110861.267782301, eps);
+
+ UNIT_ASSERT_DOUBLES_EQUAL(GeodeticDistance(moscow, minsk), 677190.08871321136, eps);
+ UNIT_ASSERT_DOUBLES_EQUAL(GeodeticDistance(moscow, moscow), 0.0, eps);
+ UNIT_ASSERT_DOUBLES_EQUAL(GeodeticDistance(moscow, newYork), 7519517.2469277605, eps);
+ UNIT_ASSERT_DOUBLES_EQUAL(GeodeticDistance(moscow, sydney), 14467193.188083574, eps);
+
+ UNIT_ASSERT_DOUBLES_EQUAL(GeodeticDistance(newYork, minsk), 7129091.7536358498, eps);
+ UNIT_ASSERT_DOUBLES_EQUAL(GeodeticDistance(newYork, moscow), 7519517.2469277605, eps);
+ UNIT_ASSERT_DOUBLES_EQUAL(GeodeticDistance(newYork, newYork), 0.0, eps);
+ UNIT_ASSERT_DOUBLES_EQUAL(GeodeticDistance(newYork, sydney), 15954603.669226252, eps);
+
+ UNIT_ASSERT_DOUBLES_EQUAL(GeodeticDistance(sydney, minsk), 15110861.267782301, eps);
+ UNIT_ASSERT_DOUBLES_EQUAL(GeodeticDistance(sydney, moscow), 14467193.188083574, eps);
+ UNIT_ASSERT_DOUBLES_EQUAL(GeodeticDistance(sydney, newYork), 15954603.669226252, eps);
+ UNIT_ASSERT_DOUBLES_EQUAL(GeodeticDistance(sydney, sydney), 0.0, eps);
+ }
+}
diff --git a/library/cpp/geo/ut/polygon_ut.cpp b/library/cpp/geo/ut/polygon_ut.cpp
new file mode 100644
index 0000000000..cd9dee9759
--- /dev/null
+++ b/library/cpp/geo/ut/polygon_ut.cpp
@@ -0,0 +1,34 @@
+#include "polygon.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+using namespace NGeo;
+
+Y_UNIT_TEST_SUITE(TGeoPolygonTest) {
+ Y_UNIT_TEST(TestEmptyPolygon) {
+ TGeoPolygon empty;
+ UNIT_ASSERT(!empty);
+ UNIT_ASSERT(!empty.IsValid());
+ }
+
+ Y_UNIT_TEST(TestPolygon) {
+ TGeoPolygon polygon({{1., 2.}, {2., 1.}, {2., 4.}, {1., 3.}});
+ UNIT_ASSERT(polygon.IsValid());
+ UNIT_ASSERT_EQUAL(polygon.GetWindow(),
+ TGeoWindow(TGeoPoint(1., 1.), TGeoPoint(2., 4.)));
+ }
+
+ Y_UNIT_TEST(TestParse) {
+ UNIT_ASSERT_EQUAL(TGeoPolygon::Parse(TString{"1.23,5.67 7.89,10.11 11.10,9.87"}),
+ NGeo::TGeoPolygon({{1.23, 5.67}, {7.89, 10.11}, {11.10, 9.87}}));
+ UNIT_ASSERT_EQUAL(TGeoPolygon::Parse(TString{"1.23,5.67 7.89,10.11 11.10,9.87 6.54,3.21"}),
+ NGeo::TGeoPolygon({{1.23, 5.67}, {7.89, 10.11}, {11.10, 9.87}, {6.54, 3.21}}));
+
+ UNIT_ASSERT(TGeoPolygon::TryParse(TString{"1.23,5.67 7.89,10.11"}).Empty());
+ UNIT_ASSERT_EQUAL(TGeoPolygon::Parse(TString{"1.23+5.67~7.89+10.11~11.10+9.87"}, "+", "~"),
+ NGeo::TGeoPolygon({{1.23, 5.67}, {7.89, 10.11}, {11.10, 9.87}}));
+
+ UNIT_ASSERT_EQUAL(TGeoPolygon::Parse(TString{"1.23+5.67+~7.89+10.11+~11.10+9.87"}, "+", "+~"),
+ NGeo::TGeoPolygon({{1.23, 5.67}, {7.89, 10.11}, {11.10, 9.87}}));
+ }
+}
diff --git a/library/cpp/geo/ut/size_ut.cpp b/library/cpp/geo/ut/size_ut.cpp
new file mode 100644
index 0000000000..41b4a2c257
--- /dev/null
+++ b/library/cpp/geo/ut/size_ut.cpp
@@ -0,0 +1,29 @@
+#include "size.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+#include <util/generic/maybe.h>
+
+using namespace NGeo;
+
+Y_UNIT_TEST_SUITE(TSizeTest) {
+ Y_UNIT_TEST(TestFromString) {
+ UNIT_ASSERT_EQUAL(TSize::Parse("0.15,0.67"), TSize(0.15, 0.67));
+ UNIT_ASSERT_EQUAL(TSize::Parse("0.15 0.67", " "), TSize(0.15, 0.67));
+
+ UNIT_ASSERT_EXCEPTION(TSize::Parse(""), TBadCastException);
+ UNIT_ASSERT_EXCEPTION(TSize::Parse("Hello,world"), TBadCastException);
+ UNIT_ASSERT_EXCEPTION(TSize::Parse("-1,-1"), TBadCastException);
+
+ UNIT_ASSERT_EQUAL(TSize::Parse("424242 50", " "), TSize(424242., 50.));
+ UNIT_ASSERT_EQUAL(TSize::Parse("50.,424242"), TSize(50., 424242.));
+ UNIT_ASSERT_EQUAL(TSize::Parse(" 0.01, 0.01"), TSize(0.01, 0.01));
+ UNIT_ASSERT_EXCEPTION(TSize::Parse("0.01 ,0.01"), TBadCastException);
+ UNIT_ASSERT_EXCEPTION(TSize::Parse("0.01,0.01 "), TBadCastException);
+ }
+
+ Y_UNIT_TEST(TestTryFromString) {
+ UNIT_ASSERT(TSize::TryParse("1,2"));
+ UNIT_ASSERT(!TSize::TryParse("-1,-2"));
+ UNIT_ASSERT(!TSize::TryParse("1,2a"));
+ }
+}
diff --git a/library/cpp/geo/ut/util_ut.cpp b/library/cpp/geo/ut/util_ut.cpp
new file mode 100644
index 0000000000..ebd86cfbd8
--- /dev/null
+++ b/library/cpp/geo/ut/util_ut.cpp
@@ -0,0 +1,36 @@
+#include <library/cpp/geo/util.h>
+
+#include <library/cpp/testing/unittest/registar.h>
+
+using namespace NGeo;
+
+Y_UNIT_TEST_SUITE(TGeoUtilTest) {
+ Y_UNIT_TEST(TestPointFromString) {
+ UNIT_ASSERT_EQUAL(PairFromString("27.56,53.90"), (std::pair<double, double>(27.56, 53.90)));
+ UNIT_ASSERT_EQUAL(PairFromString("27.56 53.90", " "), (std::pair<double, double>(27.56, 53.90)));
+ UNIT_ASSERT_EQUAL(PairFromString("27.56@@53.90", "@@"), (std::pair<double, double>(27.56, 53.90)));
+ UNIT_ASSERT_EXCEPTION(PairFromString("27.56@@53.90", "@"), TBadCastException);
+ UNIT_ASSERT_EXCEPTION(PairFromString(""), TBadCastException);
+ }
+
+ Y_UNIT_TEST(TestTryPointFromString) {
+ std::pair<double, double> point;
+
+ UNIT_ASSERT(TryPairFromString(point, "27.56,53.90"));
+ UNIT_ASSERT_EQUAL(point, (std::pair<double, double>(27.56, 53.90)));
+
+ UNIT_ASSERT(TryPairFromString(point, "27.56 53.90", " "));
+ UNIT_ASSERT_EQUAL(point, (std::pair<double, double>(27.56, 53.90)));
+
+ UNIT_ASSERT(TryPairFromString(point, "27.56@@53.90", "@@"));
+ UNIT_ASSERT_EQUAL(point, (std::pair<double, double>(27.56, 53.90)));
+
+ UNIT_ASSERT(!TryPairFromString(point, "27.56@@53.90", "@"));
+ UNIT_ASSERT(!TryPairFromString(point, ""));
+ }
+
+ Y_UNIT_TEST(TestVisibleMapBound) {
+ const double expectedLat = MercatorToLL(TMercatorPoint(0., LLToMercator(TGeoPoint(180., 0.)).X())).Lat();
+ UNIT_ASSERT_DOUBLES_EQUAL(VISIBLE_LATITUDE_BOUND, expectedLat, 1.e-14);
+ }
+}
diff --git a/library/cpp/geo/ut/window_ut.cpp b/library/cpp/geo/ut/window_ut.cpp
new file mode 100644
index 0000000000..194fb4e735
--- /dev/null
+++ b/library/cpp/geo/ut/window_ut.cpp
@@ -0,0 +1,547 @@
+#include "window.h"
+#include <library/cpp/testing/unittest/registar.h>
+#include <util/generic/ymath.h>
+
+using namespace NGeo;
+
+namespace {
+ constexpr double DEFAULT_EPS = 1.E-5;
+
+ bool CheckGeoPointEqual(const TGeoPoint& found, const TGeoPoint& expected, const double eps = DEFAULT_EPS) {
+ if (std::isnan(found.Lon()) || std::isnan(found.Lat())) {
+ Cerr << "NaNs found: (" << found.Lon() << ", " << found.Lat() << ")" << Endl;
+ return false;
+ }
+ if (Abs(found.Lon() - expected.Lon()) > eps) {
+ Cerr << "longitude differs: " << found.Lon() << " found, " << expected.Lon() << " expected" << Endl;
+ return false;
+ }
+ if (Abs(found.Lat() - expected.Lat()) > eps) {
+ Cerr << "latitude differs: " << found.Lat() << " found, " << expected.Lat() << " expected" << Endl;
+ return false;
+ }
+ return true;
+ }
+
+ bool CheckSizeEqual(const TSize& found, const TSize& expected, const double eps = DEFAULT_EPS) {
+ if (std::isnan(found.GetWidth()) || std::isnan(found.GetHeight())) {
+ Cerr << "NaNs found: (" << found.GetWidth() << ", " << found.GetHeight() << ")" << Endl;
+ return false;
+ }
+ if (Abs(found.GetWidth() - expected.GetWidth()) > eps) {
+ Cerr << "width differs: " << found.GetWidth() << " found, " << expected.GetWidth() << " expected" << Endl;
+ return false;
+ }
+ if (Abs(found.GetHeight() - expected.GetHeight()) > eps) {
+ Cerr << "height differs: " << found.GetHeight() << " found, " << expected.GetHeight() << " expected" << Endl;
+ return false;
+ }
+ return true;
+ }
+
+ bool CheckGeoWindowEqual(const TGeoWindow& lhs, const TGeoWindow& rhs, const double eps = DEFAULT_EPS) {
+ return CheckGeoPointEqual(lhs.GetCenter(), rhs.GetCenter(), eps) && CheckSizeEqual(lhs.GetSize(), rhs.GetSize(), eps);
+ }
+} // namespace
+
+/**
+ * TGeoWindow
+ */
+Y_UNIT_TEST_SUITE(TGeoWindowTest) {
+ Y_UNIT_TEST(TestParser) {
+ UNIT_ASSERT_EQUAL(TGeoWindow::ParseFromCornersPoints("1.23,5.67", "7.65,3.21"),
+ TGeoWindow(TGeoPoint(1.23, 3.21), TGeoPoint(7.65, 5.67)));
+ UNIT_ASSERT_EQUAL(TGeoWindow::ParseFromCornersPoints("1.23~5.67", "7.65~3.21", "~"),
+ TGeoWindow(TGeoPoint(1.23, 3.21), TGeoPoint(7.65, 5.67)));
+ UNIT_ASSERT_EXCEPTION(TGeoWindow::ParseFromCornersPoints("1.23~5.67", "7.65~3.21"), TBadCastException);
+
+ UNIT_ASSERT(TGeoWindow::TryParseFromCornersPoints("1.23~5.67", "7.65~3.21").Empty());
+ UNIT_ASSERT(TGeoWindow::TryParseFromCornersPoints("1.23,5.67", "7.65,3.21").Defined());
+ UNIT_ASSERT_EQUAL(TGeoWindow::TryParseFromCornersPoints("1.23,5.67", "7.65,3.21").GetRef(),
+ TGeoWindow(TGeoPoint(1.23, 3.21), TGeoPoint(7.65, 5.67)));
+ UNIT_ASSERT(TGeoWindow::TryParseFromCornersPoints("1.23+++5.67+", "7.65+++3.21+", "+++").Empty());
+
+ UNIT_ASSERT_EQUAL(TGeoWindow::ParseFromLlAndSpn("1.23,5.67", "0.1,0.2"),
+ TGeoWindow(TGeoPoint(1.23, 5.67), TSize(0.1, 0.2)));
+ UNIT_ASSERT_EQUAL(TGeoWindow::ParseFromLlAndSpn("1.23~5.67", "0.1~0.2", "~"),
+ TGeoWindow(TGeoPoint(1.23, 5.67), TSize(0.1, 0.2)));
+ UNIT_ASSERT_EXCEPTION(TGeoWindow::ParseFromLlAndSpn("1.23~5.67", "0.1~0.2"), TBadCastException);
+ UNIT_ASSERT(TGeoWindow::TryParseFromLlAndSpn("1.23~5.67", "0.1~0.2").Empty());
+ UNIT_ASSERT(TGeoWindow::TryParseFromLlAndSpn("1.23~5.67", "0.1~0.2", "~").Defined());
+ UNIT_ASSERT_EQUAL(TGeoWindow::TryParseFromLlAndSpn("1.23~5.67", "0.1~0.2", "~").GetRef(),
+ TGeoWindow(TGeoPoint(1.23, 5.67), TSize(0.1, 0.2)));
+ }
+
+ Y_UNIT_TEST(TestConstructor) {
+ TGeoPoint center{55.50, 82.50};
+ TSize size{5.00, 3.00};
+ TGeoWindow window(center, size);
+
+ UNIT_ASSERT_EQUAL(window.GetCenter(), center);
+ UNIT_ASSERT_EQUAL(window.GetSize(), size);
+ }
+
+ Y_UNIT_TEST(TestPoles) {
+ {
+ TGeoWindow northPole{TGeoPoint{180., 90.}, TSize{1.5, 1.5}};
+ UNIT_ASSERT(CheckGeoPointEqual(northPole.GetCenter(), TGeoPoint{180., 90.}));
+ UNIT_ASSERT(CheckGeoPointEqual(northPole.GetLowerLeftCorner(), TGeoPoint{179.25, 88.5}));
+ UNIT_ASSERT(CheckGeoPointEqual(northPole.GetUpperRightCorner(), TGeoPoint{180.75, 90.0}));
+ }
+ {
+ TGeoWindow tallWindow{TGeoPoint{37., 55.}, TSize{10., 180.}};
+ UNIT_ASSERT(CheckGeoPointEqual(tallWindow.GetCenter(), TGeoPoint{37., 55.}));
+ UNIT_ASSERT(CheckGeoPointEqual(tallWindow.GetLowerLeftCorner(), TGeoPoint{32., -90.}));
+ UNIT_ASSERT(CheckGeoPointEqual(tallWindow.GetUpperRightCorner(), TGeoPoint{42., 90.}));
+ }
+ {
+ TGeoWindow world{TGeoPoint{0., 0.}, TSize{360., 180.}};
+ UNIT_ASSERT(CheckGeoPointEqual(world.GetCenter(), TGeoPoint{0., 0.}));
+ UNIT_ASSERT(CheckGeoPointEqual(world.GetLowerLeftCorner(), TGeoPoint{-180., -90.}));
+ UNIT_ASSERT(CheckGeoPointEqual(world.GetUpperRightCorner(), TGeoPoint{180., 90.}));
+ }
+ {
+ TGeoWindow world{TGeoPoint{0., 0.}, TSize{360., 360.}};
+ UNIT_ASSERT(CheckGeoPointEqual(world.GetCenter(), TGeoPoint{0., 0.}));
+ UNIT_ASSERT(CheckGeoPointEqual(world.GetLowerLeftCorner(), TGeoPoint{-180., -90.}));
+ UNIT_ASSERT(CheckGeoPointEqual(world.GetUpperRightCorner(), TGeoPoint{180., 90.}));
+ }
+ }
+
+ Y_UNIT_TEST(TestBigSize) {
+ {
+ TGeoWindow w{TGeoPoint{37., 55.}, TSize{100., 179.}};
+ UNIT_ASSERT(CheckGeoPointEqual(w.GetCenter(), TGeoPoint{37., 55.}));
+ UNIT_ASSERT(CheckGeoPointEqual(w.GetLowerLeftCorner(), TGeoPoint{-13., -89.09540675}));
+ UNIT_ASSERT(CheckGeoPointEqual(w.GetUpperRightCorner(), TGeoPoint{87., 89.90907637}));
+ }
+ }
+
+ Y_UNIT_TEST(TestCenterWhenInitWithCorners) {
+ UNIT_ASSERT(CheckGeoPointEqual(TGeoWindow(TGeoPoint{5.00, 40.00}, TGeoPoint{25.00, 80.00}).GetCenter(), TGeoPoint{15.00, 67.17797}));
+ UNIT_ASSERT(CheckGeoPointEqual(TGeoWindow(TGeoPoint{-5.00, -40.00}, TGeoPoint{-25.00, -80.00}).GetCenter(), TGeoPoint{-15.00, -67.17797}));
+ }
+
+ Y_UNIT_TEST(TestCornersWhenInitWithCenter) {
+ // check lat calc
+ UNIT_ASSERT_DOUBLES_EQUAL(TGeoWindow(TGeoPoint{25.00, 50.00}, TSize{10.00, 10.00}).GetLowerLeftCorner().Lat(), 44.73927, DEFAULT_EPS);
+
+ // lat equals to 90
+ UNIT_ASSERT_DOUBLES_EQUAL(TGeoWindow(TGeoPoint{25.00, 50.00}, TSize{10.00, 179.99999}).GetUpperRightCorner().Lat(), 90, DEFAULT_EPS);
+
+ // lat equals to -90
+ UNIT_ASSERT_DOUBLES_EQUAL(TGeoWindow(TGeoPoint{25.00, -50.00}, TSize{10.00, -179.99999}).GetUpperRightCorner().Lat(), -90, DEFAULT_EPS);
+
+ // check naive lon calc
+ UNIT_ASSERT_DOUBLES_EQUAL(TGeoWindow(TGeoPoint{10, 10}, TSize{10, 5}).GetLowerLeftCorner().Lon(), 5, DEFAULT_EPS);
+
+ // check lon equals to 190 (no wrapping)
+ UNIT_ASSERT_DOUBLES_EQUAL(TGeoWindow(TGeoPoint{20, 0}, TSize{340, 5}).GetUpperRightCorner().Lon(), 190, DEFAULT_EPS);
+
+ UNIT_ASSERT_DOUBLES_EQUAL(TGeoWindow(TGeoPoint{-40, 0}, TSize{-280, 5}).GetUpperRightCorner().Lon(), -180, DEFAULT_EPS);
+
+ // naive calculating when point is (0, 0)
+ UNIT_ASSERT(CheckGeoPointEqual(TGeoWindow(TGeoPoint{0, 0}, TSize{160, 160}).GetLowerLeftCorner(), TGeoPoint{-80, -80}, DEFAULT_EPS));
+ UNIT_ASSERT(CheckGeoPointEqual(TGeoWindow(TGeoPoint{0, 0}, TSize{160, 160}).GetUpperRightCorner(), TGeoPoint{80, 80}, DEFAULT_EPS));
+ }
+
+ Y_UNIT_TEST(TestCenterSetter) {
+ TGeoPoint center{27.56, 53.90};
+ TGeoWindow window{};
+ window.SetCenter(center);
+ UNIT_ASSERT_EQUAL(window.GetCenter(), center);
+ }
+
+ Y_UNIT_TEST(TestEqualOperator) {
+ TGeoWindow window{TGeoPoint{27.56, 53.90}, TGeoPoint{30.35, 56.89}};
+ UNIT_ASSERT(window == window);
+
+ TGeoWindow anotherWindow{TGeoPoint{60.10, 57.90}, TGeoPoint{60.70, 58.25}};
+ UNIT_ASSERT(!(window == anotherWindow));
+ }
+
+ Y_UNIT_TEST(TestAssignmentOperator) {
+ TGeoWindow lhs{TGeoPoint{27.56, 53.90}, TGeoPoint{30.35, 53.89}};
+ TGeoWindow rhs{};
+ rhs = lhs;
+ UNIT_ASSERT_EQUAL(lhs, rhs);
+ }
+
+ Y_UNIT_TEST(TestContainsMethod) {
+ // you could see cases here https://tech.yandex.ru/maps/jsbox/2.1/rectangle
+ // (pay attention that the first coord is lat and the second one is lon)
+ TGeoWindow window{TGeoPoint{27.45, 53.82}, TGeoPoint{27.65, 53.97}};
+
+ // point is inside the window
+ UNIT_ASSERT(window.Contains(TGeoPoint{27.55, 53.90}));
+
+ // point is to the right of the window
+ UNIT_ASSERT(!window.Contains(TGeoPoint{27.66, 53.95}));
+
+ // point is to the left of the window
+ UNIT_ASSERT(!window.Contains(TGeoPoint{27.44, 53.95}));
+
+ // point is under the window
+ UNIT_ASSERT(!window.Contains(TGeoPoint{27.50, 53.81}));
+
+ // point is above the window
+ UNIT_ASSERT(!window.Contains(TGeoPoint{27.50, 53.98}));
+
+ // point is on border
+ UNIT_ASSERT(window.Contains(TGeoPoint{27.45, 53.86}));
+ UNIT_ASSERT(window.Contains(TGeoPoint{27.65, 53.86}));
+ UNIT_ASSERT(window.Contains(TGeoPoint{27.55, 53.82}));
+ UNIT_ASSERT(window.Contains(TGeoPoint{27.55, 53.97}));
+
+ // negate coord
+ UNIT_ASSERT(TGeoWindow(TGeoPoint{-72.17, -38.82}, TGeoPoint{-68.95, -36.70}).Contains(TGeoPoint{-70.40, -37.75}));
+
+ // special cases
+ UNIT_ASSERT(!TGeoWindow{}.Contains(TGeoPoint{60.09, 57.90}));
+
+ UNIT_ASSERT(TGeoWindow(TGeoPoint{}, TGeoPoint{27.55, 53.90}).Contains(TGeoPoint{27.55, 53.90}));
+ UNIT_ASSERT(TGeoWindow(TGeoPoint{27.55, 53.90}, TGeoPoint{}).Contains(TGeoPoint{27.55, 53.90}));
+ }
+
+ Y_UNIT_TEST(TestIntersectsMethod) {
+ // intersect only by lat
+ UNIT_ASSERT(
+ !Intersects(
+ TGeoWindow{TGeoPoint{27.60, 53.90}, TGeoPoint{27.80, 53.95}},
+ TGeoWindow{TGeoPoint{27.30, 53.88}, TGeoPoint{27.50, 53.98}}));
+
+ // intersect only by lon
+ UNIT_ASSERT(
+ !Intersects(
+ TGeoWindow{TGeoPoint{27.35, 54}, TGeoPoint{27.45, 54.10}},
+ TGeoWindow{TGeoPoint{27.30, 53.88}, TGeoPoint{27.50, 53.98}}));
+
+ // one inside another
+ UNIT_ASSERT(
+ Intersects(
+ TGeoWindow{TGeoPoint{27.35, 53.90}, TGeoPoint{27.45, 53.95}},
+ TGeoWindow{TGeoPoint{27.30, 53.88}, TGeoPoint{27.50, 53.98}}));
+
+ // intersection is point
+ UNIT_ASSERT(
+ !Intersects(
+ TGeoWindow{TGeoPoint{27.50, 53.98}, TGeoPoint{27.70, 54.00}},
+ TGeoWindow{TGeoPoint{27.30, 53.88}, TGeoPoint{27.50, 53.98}}));
+
+ // intersection is segment
+ UNIT_ASSERT(
+ !Intersects(
+ TGeoWindow{TGeoPoint{27.40, 53.98}, TGeoPoint{27.70, 54.00}},
+ TGeoWindow{TGeoPoint{27.30, 53.88}, TGeoPoint{27.50, 53.98}}));
+
+ // intersection is area
+ UNIT_ASSERT(
+ Intersects(
+ TGeoWindow{TGeoPoint{27.40, 53.90}, TGeoPoint{27.70, 54.00}},
+ TGeoWindow{TGeoPoint{27.30, 53.88}, TGeoPoint{27.50, 53.98}}));
+
+ // equal windows
+ TGeoWindow window{TGeoPoint{27.60, 53.88}, TGeoPoint{27.80, 53.98}};
+ UNIT_ASSERT(Intersects(window, window));
+ }
+
+ Y_UNIT_TEST(TestIntersectionMethod) {
+ // non-intersecting window
+ UNIT_ASSERT(
+ !(Intersection(
+ TGeoWindow{TGeoPoint{37.66, 55.66}, TGeoPoint{37.53, 55.64}},
+ TGeoWindow{TGeoPoint{37.67, 55.66}, TGeoPoint{37.69, 55.71}})));
+
+ // one inside another
+ UNIT_ASSERT(CheckGeoWindowEqual(
+ Intersection(
+ TGeoWindow{TGeoPoint{37.00, 55.00}, TSize{10.00, 10.00}},
+ TGeoWindow{TGeoPoint{37.00, 55.00}, TSize{2.00, 2.00}})
+ .GetRef(),
+ (TGeoWindow{TGeoPoint{37.00, 55.00}, TSize{2.00, 2.00}})));
+
+ // cross
+ UNIT_ASSERT(CheckGeoWindowEqual(
+ Intersection(
+ TGeoWindow{TGeoPoint{37.00, 55.00}, TSize{10.00, 2.00}},
+ TGeoWindow{TGeoPoint{37.00, 55.00}, TSize{2.00, 10.00}})
+ .GetRef(),
+ (TGeoWindow{TGeoPoint{37.00, 55.00}, TSize{2.00, 2.00}})));
+
+ // intersection is a point
+ UNIT_ASSERT(CheckGeoWindowEqual(
+ Intersection(
+ TGeoWindow{TGeoPoint{27.50, 53.98}, TGeoPoint{27.70, 54.00}},
+ TGeoWindow{TGeoPoint{27.30, 53.88}, TGeoPoint{27.50, 53.98}})
+ .GetRef(),
+ (TGeoWindow{TGeoPoint{27.50, 53.98}, TSize{0, 0}})));
+
+ // intersection is a segment
+ UNIT_ASSERT(CheckGeoWindowEqual(
+ Intersection(
+ TGeoWindow{TGeoPoint{27.40, 53.98}, TGeoPoint{27.70, 54.00}},
+ TGeoWindow{TGeoPoint{27.30, 53.88}, TGeoPoint{27.50, 53.98}})
+ .GetRef(),
+ (TGeoWindow{TGeoPoint{27.45, 53.98}, TSize{0.10, 0}})));
+
+ // intersection is area
+ UNIT_ASSERT(CheckGeoWindowEqual(
+ Intersection(
+ TGeoWindow{TGeoPoint{27.40, 53.90}, TGeoPoint{27.70, 54.00}},
+ TGeoWindow{TGeoPoint{27.30, 53.88}, TGeoPoint{27.50, 53.98}})
+ .GetRef(),
+ (TGeoWindow{TGeoPoint{27.40, 53.90}, TGeoPoint{27.50, 53.98}})));
+
+ // special cases
+ UNIT_ASSERT(
+ !(Intersection(
+ TGeoWindow{TGeoPoint{27.30, 53.88}, TGeoPoint{27.50, 53.98}},
+ TGeoWindow{})));
+ }
+
+ Y_UNIT_TEST(TestDistanceMethod) {
+ // one window inside another
+ UNIT_ASSERT_DOUBLES_EQUAL(
+ (TGeoWindow{TGeoPoint{27.50, 53.98}, TGeoPoint{27.80, 54.10}})
+ .Distance(TGeoWindow{TGeoPoint{27.55, 54.00}, TGeoPoint{27.70, 54.07}}),
+ 0,
+ 1.E-5);
+
+ // gap only by lon
+ UNIT_ASSERT_DOUBLES_EQUAL(
+ (TGeoWindow{TGeoPoint{27.50, 53.98}, TGeoPoint{27.60, 54.10}})
+ .Distance(TGeoWindow{TGeoPoint{27.69, 54.10}, TGeoPoint{27.90, 54.20}}),
+ 0.052773,
+ 1.E-5);
+
+ // gap only by lat
+ UNIT_ASSERT_DOUBLES_EQUAL(
+ (TGeoWindow{TGeoPoint{27.50, 53.98}, TGeoPoint{27.60, 54.10}})
+ .Distance(TGeoWindow{TGeoPoint{27.50, 54.20}, TGeoPoint{27.70, 54.30}}),
+ 0.1,
+ 1.E-5);
+
+ // gap by lot and lat, you can calculate answer using two previous tests
+ UNIT_ASSERT_DOUBLES_EQUAL(
+ (TGeoWindow{TGeoPoint{27.50, 53.98}, TGeoPoint{27.60, 54.10}}
+ .Distance(TGeoWindow{TGeoPoint{27.69, 54.20}, TGeoPoint{27.70, 54.30}})),
+ 0.11304,
+ 1.E-5);
+
+ // negate coord
+ UNIT_ASSERT_DOUBLES_EQUAL(
+ (TGeoWindow{TGeoPoint{-27.50, -53.98}, TGeoPoint{-27.60, -54.10}}
+ .Distance(TGeoWindow{TGeoPoint{-27.69, -54.20}, TGeoPoint{-27.70, -54.30}})),
+ 0.11304,
+ 1.E-5);
+ }
+
+ Y_UNIT_TEST(TestApproxDistanceMethod) {
+ // point inside
+ UNIT_ASSERT_DOUBLES_EQUAL(
+ (TGeoWindow{TGeoPoint{27.50, 53.98}, TGeoPoint{27.80, 54.10}})
+ .GetApproxDistance(TGeoPoint{27.60, 54.05}),
+ 0,
+ 1.E-5);
+
+ // gap only by lon
+ UNIT_ASSERT_DOUBLES_EQUAL(
+ (TGeoWindow{TGeoPoint{27.50, 54.00}, TGeoPoint{27.60, 54.10}})
+ .GetApproxDistance(TGeoPoint{27.70, 54.05}),
+ 6535.3,
+ 0.1);
+
+ // gap only by lat
+ UNIT_ASSERT_DOUBLES_EQUAL(
+ (TGeoWindow{TGeoPoint{27.50, 54.00}, TGeoPoint{27.60, 54.10}})
+ .GetApproxDistance(TGeoPoint{27.55, 53.95}),
+ 5566.0,
+ 0.1);
+
+ // gap by lot and lat
+ UNIT_ASSERT_DOUBLES_EQUAL(
+ (TGeoWindow{TGeoPoint{27.50, 54.00}, TGeoPoint{27.60, 54.10}})
+ .GetApproxDistance(TGeoPoint{27.70, 54.20}),
+ 12900.6,
+ 0.1);
+
+ // negate coord
+ UNIT_ASSERT_DOUBLES_EQUAL(
+ (TGeoWindow{TGeoPoint{-27.50, -54.00}, TGeoPoint{-27.60, -54.10}})
+ .GetApproxDistance(TGeoPoint{-27.70, -54.20}),
+ 12900.6,
+ 0.1);
+ }
+
+ Y_UNIT_TEST(TestUnionMethod) {
+ // one inside another
+ UNIT_ASSERT(CheckGeoWindowEqual(
+ Union(
+ TGeoWindow{TGeoPoint{37.00, 55.00}, TSize{2.00, 3.00}},
+ TGeoWindow{TGeoPoint{37.10, 55.20}, TSize{1.50, 1.00}}),
+ TGeoWindow(TGeoPoint{37.00, 55.00}, TSize{2.00, 3.00})));
+
+ // non-intersecting windows
+ UNIT_ASSERT(CheckGeoWindowEqual(
+ Union(
+ TGeoWindow{TGeoPoint{37.00, 55.00}, TGeoPoint{37.10, 55.10}},
+ TGeoWindow{TGeoPoint{37.20, 55.20}, TGeoPoint{37.30, 55.30}}),
+ TGeoWindow(TGeoPoint{37.00, 55.00}, TGeoPoint{37.30, 55.30})));
+
+ // negate coords, one inside another
+ UNIT_ASSERT(CheckGeoWindowEqual(
+ Union(
+ TGeoWindow{TGeoPoint{-57.62, -20.64}, TSize{2.00, 4.00}},
+ TGeoWindow{TGeoPoint{-57.62, -20.64}, TSize{12.00, 10.00}}),
+ TGeoWindow(TGeoPoint{-57.62, -20.64}, TSize{12.00, 10.00}), 1.E-2));
+
+ // cross
+ UNIT_ASSERT(CheckGeoWindowEqual(
+ Union(
+ TGeoWindow{TGeoPoint{-3.82, 5.52}, TGeoPoint{0.10, 6.50}},
+ TGeoWindow{TGeoPoint{-1.5, 4.20}, TGeoPoint{-0.5, 7.13}}),
+ TGeoWindow(TGeoPoint{-3.82, 4.20}, TGeoPoint{0.10, 7.13})));
+
+ // special cases
+ UNIT_ASSERT(CheckGeoWindowEqual(
+ Union(
+ TGeoWindow{TGeoPoint{-3.82, 5.52}, TGeoPoint{0.10, 6.50}},
+ TGeoWindow{}),
+ TGeoWindow(TGeoPoint{-3.82, 5.52}, TGeoPoint{361., 181.})));
+
+ UNIT_ASSERT(CheckGeoWindowEqual(
+ Union(
+ TGeoWindow{},
+ TGeoWindow{TGeoPoint{-3.82, 5.52}, TGeoPoint{0.10, 6.50}}),
+ TGeoWindow(TGeoPoint{-3.82, 5.52}, TGeoPoint{361., 181.})));
+ }
+
+ Y_UNIT_TEST(TestStretchMethod) {
+ TSize size{0.5, 1};
+ TGeoPoint center{27.40, 53.90};
+ TGeoWindow window{};
+ double multiplier = 0;
+
+ // multiplier is less than 1.
+ window = {center, size};
+ multiplier = 0.5;
+
+ UNIT_ASSERT(CheckGeoPointEqual(window.GetLowerLeftCorner(), TGeoPoint{27.14999, 53.39699}));
+ UNIT_ASSERT(CheckGeoPointEqual(window.GetUpperRightCorner(), TGeoPoint{27.65000, 54.39699}));
+
+ window.Stretch(multiplier);
+ UNIT_ASSERT(CheckGeoWindowEqual(window, TGeoWindow{center, TSize{0.25, 0.5}}));
+ UNIT_ASSERT(CheckGeoPointEqual(window.GetLowerLeftCorner(), TGeoPoint{27.27499, 53.64925}));
+ UNIT_ASSERT(CheckGeoPointEqual(window.GetUpperRightCorner(), TGeoPoint{27.52500, 54.14924}));
+
+ // multiplier is greater than 1.
+ window = {center, size};
+ multiplier = 2.2;
+
+ window.Stretch(multiplier);
+ UNIT_ASSERT(CheckGeoWindowEqual(window, TGeoWindow{center, TSize{1.1, 2.2}}));
+ UNIT_ASSERT(CheckGeoPointEqual(window.GetLowerLeftCorner(), TGeoPoint{26.84999, 52.78545}));
+ UNIT_ASSERT(CheckGeoPointEqual(window.GetUpperRightCorner(), TGeoPoint{27.95000, 54.98545}));
+
+ // invalid multiplier
+ window = {center, size};
+ multiplier = 100.;
+
+ window.Stretch(multiplier);
+ UNIT_ASSERT(CheckGeoWindowEqual(window, TGeoWindow{center, TSize{50, 100}}));
+ UNIT_ASSERT(CheckGeoPointEqual(window.GetLowerLeftCorner(), TGeoPoint{2.40000, -18.88352}));
+ UNIT_ASSERT(CheckGeoPointEqual(window.GetUpperRightCorner(), TGeoPoint{52.39999, 81.26212}));
+
+ // invalid multiplier
+ window = {center, size};
+ multiplier = 0;
+
+ window.Stretch(multiplier);
+ UNIT_ASSERT(CheckGeoWindowEqual(window, TGeoWindow{center, TSize{0, 0}}));
+ UNIT_ASSERT(CheckGeoPointEqual(window.GetLowerLeftCorner(), TGeoPoint{27.39999, 53.90000}));
+ UNIT_ASSERT(CheckGeoPointEqual(window.GetUpperRightCorner(), TGeoPoint{27.39999, 53.90000}));
+
+ // invalid multiplier
+ window = {center, size};
+ multiplier = -5.;
+
+ window.Stretch(multiplier);
+ UNIT_ASSERT(CheckGeoWindowEqual(window, TGeoWindow{center, TSize{-2.5, -5}}));
+ UNIT_ASSERT(CheckGeoPointEqual(window.GetLowerLeftCorner(), TGeoPoint{28.64999, 56.32495}));
+ UNIT_ASSERT(CheckGeoPointEqual(window.GetUpperRightCorner(), TGeoPoint{26.15000, 51.32491}));
+ }
+}
+
+/**
+ * TMercatorWindow
+ */
+Y_UNIT_TEST_SUITE(TMercatorWindowTest) {
+ Y_UNIT_TEST(TestConstructor) {
+ // init with two corners
+ TMercatorPoint lowerLeft{5, 3};
+ TMercatorPoint upperRight{10, 20};
+ TMercatorWindow window{lowerLeft, upperRight};
+
+ UNIT_ASSERT_EQUAL(window.GetWidth(), 5.);
+ UNIT_ASSERT_EQUAL(window.GetHeight(), 17.);
+ UNIT_ASSERT_EQUAL(window.GetCenter(), (TMercatorPoint{7.5, 11.5}));
+
+ TMercatorPoint center{8, 12};
+ TSize size{5, 17};
+ window = {center, size};
+ UNIT_ASSERT_EQUAL(window.GetUpperRightCorner().X(), 10.5);
+ UNIT_ASSERT_EQUAL(window.GetUpperRightCorner().Y(), 20.5);
+ UNIT_ASSERT_EQUAL(window.GetLowerLeftCorner().X(), 5.5);
+ UNIT_ASSERT_EQUAL(window.GetLowerLeftCorner().Y(), 3.5);
+ }
+
+ Y_UNIT_TEST(TestInflateMethod) {
+ TSize size{200, 500};
+ TMercatorPoint center{441, 688};
+ TMercatorWindow window{};
+ int add = 10;
+
+ window = {center, size};
+ UNIT_ASSERT_EQUAL(window.GetLowerLeftCorner(), TMercatorPoint(341, 438));
+ UNIT_ASSERT_EQUAL(window.GetUpperRightCorner(), TMercatorPoint(541, 938));
+ window.Inflate(add);
+ UNIT_ASSERT_EQUAL(window, TMercatorWindow(center, TSize{220, 520}));
+ UNIT_ASSERT_EQUAL(window.GetLowerLeftCorner(), TMercatorPoint(331, 428));
+ UNIT_ASSERT_EQUAL(window.GetUpperRightCorner(), TMercatorPoint(551, 948));
+
+ // negate coords
+ center = {-441, -688};
+ window = {center, size};
+ UNIT_ASSERT_EQUAL(window.GetLowerLeftCorner(), TMercatorPoint(-541, -938));
+ UNIT_ASSERT_EQUAL(window.GetUpperRightCorner(), TMercatorPoint(-341, -438));
+ window.Inflate(add);
+ UNIT_ASSERT_EQUAL(window, TMercatorWindow(center, TSize{220, 520}));
+ UNIT_ASSERT_EQUAL(window.GetLowerLeftCorner(), TMercatorPoint(-551, -948));
+ UNIT_ASSERT_EQUAL(window.GetUpperRightCorner(), TMercatorPoint(-331, -428));
+
+ // size becomes negate
+ size = {6, 12};
+ center = {0, 0};
+ window = {center, size};
+ UNIT_ASSERT_EQUAL(window.GetLowerLeftCorner(), TMercatorPoint(-3, -6));
+ UNIT_ASSERT_EQUAL(window.GetUpperRightCorner(), TMercatorPoint(3, 6));
+
+ add = -20;
+ window.Inflate(add);
+ UNIT_ASSERT_EQUAL(window, TMercatorWindow(center, TSize{-34, -28}));
+ UNIT_ASSERT_EQUAL(window.GetLowerLeftCorner(), TMercatorPoint(17, 14));
+ UNIT_ASSERT_EQUAL(window.GetUpperRightCorner(), TMercatorPoint(-17, -14));
+ UNIT_ASSERT_EQUAL(window.GetSize(), TSize(-34, -28));
+
+ // big add param
+ size = {10, 15};
+ center = {5, 10};
+ window = {center, size};
+
+ add = static_cast<int>(1E5);
+ window.Inflate(add);
+ UNIT_ASSERT_EQUAL(window, TMercatorWindow(center, TSize{200'010, 200'015}));
+ UNIT_ASSERT_EQUAL(window.GetLowerLeftCorner(), TMercatorPoint(-100'000, -99'997.5));
+ UNIT_ASSERT_EQUAL(window.GetUpperRightCorner(), TMercatorPoint(100'010, 100'017.5));
+ }
+}
diff --git a/library/cpp/geo/ut/ya.make b/library/cpp/geo/ut/ya.make
new file mode 100644
index 0000000000..5bd891db1f
--- /dev/null
+++ b/library/cpp/geo/ut/ya.make
@@ -0,0 +1,12 @@
+UNITTEST_FOR(library/cpp/geo)
+
+SRCS(
+ load_save_helper_ut.cpp
+ polygon_ut.cpp
+ point_ut.cpp
+ size_ut.cpp
+ util_ut.cpp
+ window_ut.cpp
+)
+
+END()
diff --git a/library/cpp/geo/util.cpp b/library/cpp/geo/util.cpp
new file mode 100644
index 0000000000..e8d0fc378e
--- /dev/null
+++ b/library/cpp/geo/util.cpp
@@ -0,0 +1,34 @@
+#include "util.h"
+
+#include <math.h>
+#include <util/generic/cast.h>
+#include <util/generic/string.h>
+#include <util/string/cast.h>
+#include <utility>
+
+namespace NGeo {
+ bool TryPairFromString(std::pair<double, double>& res, TStringBuf inputStr, TStringBuf delimiter) {
+ TStringBuf lhsStr;
+ TStringBuf rhsStr;
+
+ double lhs = NAN;
+ double rhs = NAN;
+ if (
+ !inputStr.TrySplit(delimiter, lhsStr, rhsStr) ||
+ !TryFromString<double>(lhsStr, lhs) ||
+ !TryFromString<double>(rhsStr, rhs)) {
+ return false;
+ }
+
+ res = {lhs, rhs};
+ return true;
+ }
+
+ std::pair<double, double> PairFromString(TStringBuf inputStr, TStringBuf delimiter) {
+ std::pair<double, double> res;
+ if (!TryPairFromString(res, inputStr, delimiter)) {
+ ythrow TBadCastException() << "Wrong point string: " << inputStr;
+ }
+ return res;
+ }
+} // namespace NGeo
diff --git a/library/cpp/geo/util.h b/library/cpp/geo/util.h
new file mode 100644
index 0000000000..18b411e6a4
--- /dev/null
+++ b/library/cpp/geo/util.h
@@ -0,0 +1,107 @@
+#pragma once
+
+#include "point.h"
+#include "size.h"
+#include "window.h"
+
+#include <util/generic/ymath.h>
+
+namespace NGeo {
+ constexpr double MIN_LATITUDE = -90.;
+ constexpr double MAX_LATITUDE = +90.;
+ constexpr double MIN_LONGITUDE = -180.;
+ constexpr double MAX_LONGITUDE = +180.;
+ constexpr double WORLD_WIDTH = MAX_LONGITUDE - MIN_LONGITUDE;
+ constexpr double WORLD_HEIGHT = MAX_LATITUDE - MIN_LATITUDE;
+
+ // The Mercator projection is truncated at certain latitude so that the visible world forms a square. The poles are not shown.
+ constexpr double VISIBLE_LATITUDE_BOUND = 85.084059050109785;
+
+ inline double Deg2rad(double d) {
+ return d * PI / 180;
+ }
+
+ inline double Rad2deg(double d) {
+ return d * 180 / PI;
+ }
+
+ inline double GetLongitudeFromMetersAtEquator(double meters) {
+ return Rad2deg(meters * (1. / WGS84::R));
+ }
+
+ inline double GetMetersFromDeg(double angle) {
+ return Deg2rad(angle) * NGeo::WGS84::R;
+ }
+
+ inline double GetLatCos(double latDegree) {
+ return cos(Deg2rad(latDegree));
+ }
+
+ /**
+ * Get Inversed cosinus of latitude
+ * It is more precise, than division of two big doubles
+ * It is safe for lattitue at 90 degrees
+ */
+ inline double GetInversedLatCosSafe(double latDegree) {
+ return 1. / Max(0.001, cos(Deg2rad(latDegree)));
+ }
+
+ /**
+ * Gets Lontitude width for given width at equator and latitude
+ */
+ inline double GetWidthAtLatitude(double widthEquator, double latDegree) {
+ return widthEquator * GetInversedLatCosSafe(latDegree);
+ }
+
+ inline double GetWidthAtLatitude(double widthEquator, const TGeoPoint& p) {
+ return GetWidthAtLatitude(widthEquator, p.Lat());
+ }
+
+ /*
+ * Returns Normalised width at equator for specified width at latitude and latitude
+ */
+
+ inline double GetWidthAtEquator(double widthAtLatitude, double latDegree) {
+ return widthAtLatitude * GetLatCos(latDegree);
+ }
+
+ inline double GetWidthAtEquator(double widthAtLatitude, const TGeoPoint& p) {
+ return GetWidthAtEquator(widthAtLatitude, p.Lat());
+ }
+
+ /*
+ * Same for size
+ */
+
+ inline TSize GetSizeAtLatitude(const TSize& sizeAtEquator, const TGeoPoint& at) {
+ return TSize(GetWidthAtLatitude(sizeAtEquator.GetWidth(), at), sizeAtEquator.GetHeight());
+ }
+
+ inline TSize GetSizeAtEquator(const TSize& sizeAtLatitude, const TGeoPoint& at) {
+ return TSize(GetWidthAtEquator(sizeAtLatitude.GetWidth(), at), sizeAtLatitude.GetHeight());
+ }
+
+ inline TGeoWindow ConstructWindowFromEquatorSize(const TGeoPoint& center, const TSize& sizeAtEquator) {
+ return TGeoWindow(center, GetSizeAtLatitude(sizeAtEquator, center));
+ }
+
+ inline double SquaredDiagonal(const NGeo::TSize& size, double latitude) {
+ return Sqr(NGeo::GetWidthAtEquator(size.GetWidth(), latitude)) + Sqr(size.GetHeight());
+ }
+
+ inline double Diagonal(const NGeo::TSize& size, double latitude) {
+ return sqrt(SquaredDiagonal(size, latitude));
+ }
+
+ /**
+ * try to parse two coords from string
+ * return pair of coords on success, otherwise throw exception
+ */
+ std::pair<double, double> PairFromString(TStringBuf inputStr, TStringBuf delimiter = TStringBuf(","));
+
+ /**
+ * try to parse two coords from string
+ * write result to first param and return true on success, otherwise return false
+ */
+ bool TryPairFromString(std::pair<double, double>& res, TStringBuf inputStr, TStringBuf delimiter = TStringBuf(","));
+} // namespace NGeo
diff --git a/library/cpp/geo/window.cpp b/library/cpp/geo/window.cpp
new file mode 100644
index 0000000000..2ad2b61b71
--- /dev/null
+++ b/library/cpp/geo/window.cpp
@@ -0,0 +1,297 @@
+#include "window.h"
+
+#include "util.h"
+
+#include <util/generic/ylimits.h>
+#include <util/generic/ymath.h>
+#include <util/generic/maybe.h>
+
+#include <cstdlib>
+#include <utility>
+
+namespace NGeo {
+ namespace {
+ TMercatorPoint GetMiddlePoint(const TMercatorPoint& p1, const TMercatorPoint& p2) {
+ return TMercatorPoint{(p1.X() + p2.X()) / 2, (p1.Y() + p2.Y()) / 2};
+ }
+
+ struct TLatBounds {
+ double LatMin;
+ double LatMax;
+ };
+ } // namespace
+
+ bool TrySpan2LatitudeDegenerateCases(double ll, double lspan, TLatBounds& result) {
+ // TODO(sobols@): Compare with eps?
+ if (Y_UNLIKELY(lspan >= 180.)) {
+ result.LatMin = -90.;
+ result.LatMax = +90.;
+ return true;
+ }
+ if (Y_UNLIKELY(ll == +90.)) {
+ result.LatMin = ll - lspan;
+ result.LatMax = ll;
+ return true;
+ }
+ if (Y_UNLIKELY(ll == -90.)) {
+ result.LatMin = ll;
+ result.LatMax = ll + lspan;
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Finds such latitudes lmin, lmax that:
+ * 1) lmin <= ll <= lmax,
+ * 2) lmax - lmin == lspan,
+ * 3) MercatorY(ll) - MercatorY(lmin) == MercatorY(lmax) - MercatorY(ll)
+ * (the ll parallel is a center between lmin and lmax parallels in Mercator projection)
+ *
+ * \returns a pair (lmin, lmax)
+ */
+ TLatBounds Span2Latitude(double ll, double lspan) {
+ TLatBounds result{};
+ if (TrySpan2LatitudeDegenerateCases(ll, lspan, result)) {
+ return result;
+ }
+
+ const double lc = Deg2rad(ll);
+ const double h = Deg2rad(lspan);
+
+ // Spherical (Pseudo) Mercator:
+ // MercatorY(lc) = R * ln(tan(lc / 2 + PI / 4)).
+ // Note that
+ // ln(a) - ln(b) = ln(a / b)
+ // That'a why
+ // MercatorY(lc) - MercatorY(lmin) == MercatorY(lmin + h) - MercatorY(lc) <=>
+ // <=> tan(lc / 2 + PI / 4) / tan(lmin / 2 + PI / 4) ==
+ // == tan(lmin / 2 + h / 2 + PI / 4) / tan(lc / 2 + PI / 4).
+ // Also note that
+ // tan(x + y) == (tan(x) + tan(y)) / (1 - tan(x) * tan(y)),
+ // so
+ // tan(lmin / 2 + h / 2 + PI / 4) ==
+ // == (tan(lmin / 2 + PI / 4) + tan(h / 2)) / (1 - tan(lmin / 2 + PI / 4) * tan(h / 2))
+
+ const double yx = tan(lc / 2 + PI / 4);
+
+ // Let x be tan(lmin / 2 + PI / 4),
+ // then
+ // yx / x == (x + tan(h / 2)) / ((1 - x * tan(h / 2)) * yx),
+ // or
+ // yx^2 * (1 - x * tan(h / 2)) == (x + tan(h / 2)) * x.
+ // Now we solve a quadratic equation:
+ // x^2 + bx + c == 0
+
+ const double C = yx * yx;
+
+ const double b = (C + 1) * tan(h / 2), c = -C;
+ const double D = b * b - 4 * c;
+ const double root = (-b + sqrt(D)) / 2;
+
+ result.LatMin = Rad2deg((atan(root) - PI / 4) * 2);
+ result.LatMax = result.LatMin + lspan;
+ return result;
+ }
+
+ void TGeoWindow::CalcCorners() {
+ if (!IsValid()) {
+ return;
+ }
+ const TLatBounds latBounds = Span2Latitude(Center_.Lat(), Size_.GetHeight());
+
+ if (-90. < latBounds.LatMin && latBounds.LatMax < +90.) {
+ TMercatorPoint lowerLeftCornerM = LLToMercator(TGeoPoint(Center_.Lon() - (Size_.GetWidth() / 2), latBounds.LatMin));
+ TMercatorPoint upperRightCornerM = LLToMercator(TGeoPoint(Center_.Lon() + (Size_.GetWidth() / 2), latBounds.LatMax));
+ TMercatorPoint centerM = LLToMercator(Center_);
+
+ double w = upperRightCornerM.X() - lowerLeftCornerM.X();
+ double h = upperRightCornerM.Y() - lowerLeftCornerM.Y();
+
+ LowerLeftCorner_ = MercatorToLL(TMercatorPoint(centerM.X() - w / 2, centerM.Y() - h / 2));
+ UpperRightCorner_ = MercatorToLL(TMercatorPoint(centerM.X() + w / 2, centerM.Y() + h / 2));
+ } else {
+ LowerLeftCorner_ = TGeoPoint(Center_.Lon() - (Size_.GetWidth() / 2), latBounds.LatMin);
+ UpperRightCorner_ = TGeoPoint(Center_.Lon() + (Size_.GetWidth() / 2), latBounds.LatMax);
+ }
+ }
+
+ void TGeoWindow::CalcCenterAndSpan() {
+ if (!LowerLeftCorner_ || !UpperRightCorner_) {
+ return;
+ }
+
+ TMercatorPoint lower = LLToMercator(LowerLeftCorner_);
+ TMercatorPoint upper = LLToMercator(UpperRightCorner_);
+ TMercatorPoint center = GetMiddlePoint(lower, upper);
+ Center_ = MercatorToLL(center);
+
+ Size_ = TSize(UpperRightCorner_.Lon() - LowerLeftCorner_.Lon(),
+ UpperRightCorner_.Lat() - LowerLeftCorner_.Lat());
+ }
+
+ bool TGeoWindow::Contains(const TGeoPoint& p) const {
+ return LowerLeftCorner_.Lon() <= p.Lon() && p.Lon() <= UpperRightCorner_.Lon() &&
+ LowerLeftCorner_.Lat() <= p.Lat() && p.Lat() <= UpperRightCorner_.Lat();
+ }
+
+ double TGeoWindow::Diameter() const {
+ return Diagonal(Size_, Center_.Lat());
+ }
+
+ double TGeoWindow::Distance(const TGeoWindow& w) const {
+ const double minX = Max(GetLowerLeftCorner().Lon(), w.GetLowerLeftCorner().Lon());
+ const double maxX = Min(GetUpperRightCorner().Lon(), w.GetUpperRightCorner().Lon());
+ const double minY = Max(GetLowerLeftCorner().Lat(), w.GetLowerLeftCorner().Lat());
+ const double maxY = Min(GetUpperRightCorner().Lat(), w.GetUpperRightCorner().Lat());
+ double xGap = minX > maxX ? (minX - maxX) : 0.;
+ double yGap = minY > maxY ? (minY - maxY) : 0.;
+ return sqrtf(Sqr(xGap * cos((minY + maxY) * 0.5 * PI / 180)) + Sqr(yGap));
+ }
+
+ double TWindowLL::GetApproxDistance(const TPointLL& point) const {
+ const double metresInDegree = WGS84::R * PI / 180;
+ return Distance(TWindowLL{point, point}) * metresInDegree;
+ }
+
+ TGeoWindow TGeoWindow::ParseFromCornersPoints(TStringBuf leftCornerStr, TStringBuf rightCornerStr, TStringBuf delimiter) {
+ auto leftCorner = TGeoPoint::Parse(leftCornerStr, delimiter);
+ auto rightCorner = TGeoPoint::Parse(rightCornerStr, delimiter);
+
+ return {leftCorner, rightCorner};
+ }
+
+ TMaybe<TGeoWindow> TGeoWindow::TryParseFromCornersPoints(TStringBuf leftCornerStr, TStringBuf rightCornerStr, TStringBuf delimiter) {
+ auto leftCorner = TGeoPoint::TryParse(leftCornerStr, delimiter);
+ auto rightCorner = TGeoPoint::TryParse(rightCornerStr, delimiter);
+ if (!leftCorner || !rightCorner) {
+ return {};
+ }
+
+ return TGeoWindow{*leftCorner, *rightCorner};
+ }
+
+ TGeoWindow TGeoWindow::ParseFromLlAndSpn(TStringBuf llStr, TStringBuf spnStr, TStringBuf delimiter) {
+ TGeoPoint ll = TGeoPoint::Parse(llStr, delimiter);
+ TSize spn = TSize::Parse(spnStr, delimiter);
+
+ return {ll, spn};
+ }
+
+ TMaybe<TGeoWindow> TGeoWindow::TryParseFromLlAndSpn(TStringBuf llStr, TStringBuf spnStr, TStringBuf delimiter) {
+ auto ll = TGeoPoint::TryParse(llStr, delimiter);
+ auto spn = TSize::TryParse(spnStr, delimiter);
+
+ if (!ll || !spn) {
+ return {};
+ }
+
+ return TGeoWindow{*ll, *spn};
+ }
+ /**
+ * TMercatorWindow
+ */
+
+ TMercatorWindow::TMercatorWindow() noexcept
+ : HalfWidth_{std::numeric_limits<double>::quiet_NaN()}
+ , HalfHeight_{std::numeric_limits<double>::quiet_NaN()}
+ {
+ }
+
+ TMercatorWindow::TMercatorWindow(const TMercatorPoint& center, const TSize& size) noexcept
+ : Center_{center}
+ , HalfWidth_{size.GetWidth() / 2}
+ , HalfHeight_{size.GetHeight() / 2}
+ {
+ }
+
+ TMercatorWindow::TMercatorWindow(const TMercatorPoint& firstPoint, const TMercatorPoint& secondPoint) noexcept
+ : Center_{GetMiddlePoint(firstPoint, secondPoint)}
+ , HalfWidth_{Abs(secondPoint.X() - firstPoint.X()) / 2}
+ , HalfHeight_{Abs(secondPoint.Y() - firstPoint.Y()) / 2}
+ {
+ }
+
+ bool TMercatorWindow::Contains(const TMercatorPoint& pt) const noexcept {
+ return (Center_.X() - HalfWidth_ <= pt.X()) &&
+ (pt.X() <= Center_.X() + HalfWidth_) &&
+ (Center_.Y() - HalfHeight_ <= pt.Y()) &&
+ (pt.Y() <= Center_.Y() + HalfHeight_);
+ }
+
+ /**
+ * Conversion
+ */
+
+ TMercatorWindow LLToMercator(const TGeoWindow& window) {
+ return TMercatorWindow{LLToMercator(window.GetLowerLeftCorner()), LLToMercator(window.GetUpperRightCorner())};
+ }
+
+ TGeoWindow MercatorToLL(const TMercatorWindow& window) {
+ return TGeoWindow{MercatorToLL(window.GetLowerLeftCorner()), MercatorToLL(window.GetUpperRightCorner())};
+ }
+
+ /**
+ * Operators
+ */
+
+ TMaybe<TGeoWindow> Intersection(const TGeoWindow& lhs, const TGeoWindow& rhs) {
+ const double minX = Max(lhs.GetLowerLeftCorner().Lon(), rhs.GetLowerLeftCorner().Lon());
+ const double maxX = Min(lhs.GetUpperRightCorner().Lon(), rhs.GetUpperRightCorner().Lon());
+ const double minY = Max(lhs.GetLowerLeftCorner().Lat(), rhs.GetLowerLeftCorner().Lat());
+ const double maxY = Min(lhs.GetUpperRightCorner().Lat(), rhs.GetUpperRightCorner().Lat());
+ if (minX > maxX || minY > maxY) {
+ return {};
+ }
+ return TGeoWindow(TGeoPoint(minX, minY), TGeoPoint(maxX, maxY));
+ }
+
+ TMaybe<TGeoWindow> Intersection(const TMaybe<TGeoWindow>& lhs, const TMaybe<TGeoWindow>& rhs) {
+ if (!lhs || !rhs) {
+ return {};
+ }
+ return Intersection(*lhs, *rhs);
+ }
+
+ TGeoWindow Union(const TGeoWindow& lhs, const TGeoWindow& rhs) {
+ const double minX = Min(lhs.GetLowerLeftCorner().Lon(), rhs.GetLowerLeftCorner().Lon());
+ const double maxX = Max(lhs.GetUpperRightCorner().Lon(), rhs.GetUpperRightCorner().Lon());
+ const double minY = Min(lhs.GetLowerLeftCorner().Lat(), rhs.GetLowerLeftCorner().Lat());
+ const double maxY = Max(lhs.GetUpperRightCorner().Lat(), rhs.GetUpperRightCorner().Lat());
+ return TGeoWindow{TGeoPoint{minX, minY}, TGeoPoint{maxX, maxY}};
+ }
+
+ TMaybe<TGeoWindow> Union(const TMaybe<TGeoWindow>& lhs, const TMaybe<TGeoWindow>& rhs) {
+ if (!lhs) {
+ return rhs;
+ }
+ if (!rhs) {
+ return lhs;
+ }
+ return Union(*lhs, *rhs);
+ }
+
+ bool Contains(const TMaybe<TGeoWindow>& window, const TGeoPoint& point) {
+ if (!window) {
+ return false;
+ }
+ return window.GetRef().Contains(point);
+ }
+
+ bool Intersects(const TGeoWindow& lhs, const TGeoWindow& rhs) {
+ bool haveHorizIntersection =
+ !(lhs.GetUpperRightCorner().Lon() <= rhs.GetLowerLeftCorner().Lon() ||
+ rhs.GetUpperRightCorner().Lon() <= lhs.GetLowerLeftCorner().Lon());
+ bool haveVertIntersection =
+ !(lhs.GetUpperRightCorner().Lat() <= rhs.GetLowerLeftCorner().Lat() ||
+ rhs.GetUpperRightCorner().Lat() <= lhs.GetLowerLeftCorner().Lat());
+ return haveHorizIntersection && haveVertIntersection;
+ }
+
+ bool Intersects(const TMaybe<TGeoWindow>& lhs, const TMaybe<TGeoWindow>& rhs) {
+ if (!lhs || !rhs) {
+ return false;
+ }
+ return Intersects(*lhs, *rhs);
+ }
+} // namespace NGeo
diff --git a/library/cpp/geo/window.h b/library/cpp/geo/window.h
new file mode 100644
index 0000000000..1205d8351b
--- /dev/null
+++ b/library/cpp/geo/window.h
@@ -0,0 +1,264 @@
+#pragma once
+
+#include "point.h"
+#include "size.h"
+#include <util/generic/string.h>
+#include <util/generic/yexception.h>
+#include <util/string/cast.h>
+#include <util/generic/maybe.h>
+
+#include <algorithm>
+
+namespace NGeo {
+ class TGeoWindow {
+ public:
+ TGeoWindow() noexcept
+
+ = default;
+
+ TGeoWindow(const TGeoPoint& center, const TSize& size) noexcept
+ : Center_(center)
+ , Size_(size)
+ {
+ CalcCorners();
+ }
+
+ TGeoWindow(const TGeoPoint& firstPoint, const TGeoPoint& secondPoint) noexcept
+ : LowerLeftCorner_{std::min(firstPoint.Lon(), secondPoint.Lon()),
+ std::min(firstPoint.Lat(), secondPoint.Lat())}
+ , UpperRightCorner_{std::max(firstPoint.Lon(), secondPoint.Lon()),
+ std::max(firstPoint.Lat(), secondPoint.Lat())}
+ {
+ CalcCenterAndSpan();
+ }
+
+ const TGeoPoint& GetCenter() const noexcept {
+ return Center_;
+ }
+
+ void SetCenter(const TGeoPoint& newCenter) {
+ Center_ = newCenter;
+ CalcCorners();
+ }
+
+ const TSize& GetSize() const noexcept {
+ return Size_;
+ }
+
+ void SetSize(const TSize& newSize) {
+ Size_ = newSize;
+ CalcCorners();
+ }
+
+ const TGeoPoint& GetLowerLeftCorner() const noexcept {
+ return LowerLeftCorner_;
+ }
+
+ const TGeoPoint& GetUpperRightCorner() const noexcept {
+ return UpperRightCorner_;
+ }
+
+ void swap(TGeoWindow& o) noexcept {
+ Center_.swap(o.Center_);
+ Size_.swap(o.Size_);
+ LowerLeftCorner_.swap(o.LowerLeftCorner_);
+ UpperRightCorner_.swap(o.UpperRightCorner_);
+ }
+
+ bool IsValid() const noexcept {
+ return Center_.IsValid() && Size_.IsValid();
+ }
+
+ bool Contains(const TGeoPoint&) const;
+
+ bool Contains(const TGeoWindow& w) const {
+ return Contains(w.LowerLeftCorner_) && Contains(w.UpperRightCorner_);
+ }
+
+ void Stretch(double multiplier) {
+ Size_.Stretch(multiplier);
+ CalcCorners();
+ }
+
+ void Inflate(double additionX, double additionY) {
+ Size_.Inflate(additionX * 2, additionY * 2);
+ CalcCorners();
+ }
+
+ void Inflate(double addition) {
+ Inflate(addition, addition);
+ }
+
+ bool operator!() const {
+ return !IsValid();
+ }
+
+ double Diameter() const;
+
+ double Area() const {
+ return Size_.GetHeight() * Size_.GetWidth();
+ }
+
+ double Distance(const TGeoWindow&) const;
+
+ double GetApproxDistance(const TPointLL& point) const;
+
+ /**
+ * try to parse TGeoWindow from center and span
+ * return parsed TGeoWindow on success, otherwise throw exception
+ */
+ static TGeoWindow ParseFromLlAndSpn(TStringBuf llStr, TStringBuf spnStr, TStringBuf delimiter = TStringBuf(","));
+
+ /**
+ * try to parse TGeoWindow from two corners
+ * return parsed TGeoWindow on success, otherwise throw exception
+ */
+ static TGeoWindow ParseFromCornersPoints(TStringBuf leftCornerStr, TStringBuf rightCornerStr, TStringBuf delimiter = TStringBuf(","));
+
+ /**
+ * try to parse TGeoWindow from center and span
+ * return TMaybe of parsed TGeoWindow on success, otherwise return empty TMaybe
+ */
+ static TMaybe<TGeoWindow> TryParseFromLlAndSpn(TStringBuf llStr, TStringBuf spnStr, TStringBuf delimiter = TStringBuf(","));
+
+ /**
+ * try to parse TGeoWindow from two corners
+ * return TMaybe of parsed TGeoWindow on success, otherwise return empty TMaybe
+ */
+ static TMaybe<TGeoWindow> TryParseFromCornersPoints(TStringBuf leftCornerStr, TStringBuf rightCornerStr, TStringBuf delimiter = TStringBuf(","));
+
+ private:
+ TGeoPoint Center_;
+ TSize Size_;
+ TGeoPoint LowerLeftCorner_;
+ TGeoPoint UpperRightCorner_;
+
+ void CalcCorners();
+ void CalcCenterAndSpan();
+ };
+
+ inline bool operator==(const TGeoWindow& lhs, const TGeoWindow& rhs) {
+ return lhs.GetCenter() == rhs.GetCenter() && lhs.GetSize() == rhs.GetSize();
+ }
+
+ inline bool operator!=(const TGeoWindow& p1, const TGeoWindow& p2) {
+ return !(p1 == p2);
+ }
+
+ /**
+ * \class TMercatorWindow
+ *
+ * Represents a window in EPSG:3395 projection
+ * (WGS 84 / World Mercator)
+ */
+ class TMercatorWindow {
+ public:
+ TMercatorWindow() noexcept;
+ TMercatorWindow(const TMercatorPoint& center, const TSize& size) noexcept;
+ TMercatorWindow(const TMercatorPoint& firstPoint, const TMercatorPoint& secondPoint) noexcept;
+
+ const TMercatorPoint& GetCenter() const noexcept {
+ return Center_;
+ }
+
+ TSize GetHalfSize() const noexcept {
+ return {HalfWidth_, HalfHeight_};
+ }
+
+ TSize GetSize() const noexcept {
+ return {GetWidth(), GetHeight()};
+ }
+
+ double GetWidth() const noexcept {
+ return HalfWidth_ * 2;
+ }
+
+ double GetHeight() const noexcept {
+ return HalfHeight_ * 2;
+ }
+
+ TMercatorPoint GetLowerLeftCorner() const noexcept {
+ return TMercatorPoint{Center_.X() - HalfWidth_, Center_.Y() - HalfHeight_};
+ }
+
+ TMercatorPoint GetUpperRightCorner() const noexcept {
+ return TMercatorPoint{Center_.X() + HalfWidth_, Center_.Y() + HalfHeight_};
+ }
+
+ bool Contains(const TMercatorPoint& pt) const noexcept;
+
+ bool Contains(const TMercatorWindow& w) const {
+ return Contains(w.GetLowerLeftCorner()) && Contains(w.GetUpperRightCorner());
+ }
+
+ void Stretch(double multiplier) {
+ HalfWidth_ *= multiplier;
+ HalfHeight_ *= multiplier;
+ }
+
+ void Inflate(double additionX, double additionY) {
+ HalfWidth_ += additionX;
+ HalfHeight_ += additionY;
+ }
+
+ void Inflate(double addition) {
+ Inflate(addition, addition);
+ }
+
+ double Area() const {
+ return GetHeight() * GetWidth();
+ }
+
+ private:
+ bool IsDefined() const {
+ return Center_.IsDefined() && !std::isnan(HalfWidth_) && !std::isnan(HalfHeight_);
+ }
+
+ private:
+ TMercatorPoint Center_;
+ double HalfWidth_;
+ double HalfHeight_;
+ };
+
+ inline bool operator==(const TMercatorWindow& lhs, const TMercatorWindow& rhs) {
+ return lhs.GetCenter() == rhs.GetCenter() && lhs.GetHalfSize() == rhs.GetHalfSize();
+ }
+
+ inline bool operator!=(const TMercatorWindow& p1, const TMercatorWindow& p2) {
+ return !(p1 == p2);
+ }
+
+ /**
+ * Typedefs
+ * TODO(sobols@): remove
+ */
+
+ using TWindowLL = TGeoWindow;
+
+ /**
+ * Conversion
+ */
+
+ TMercatorWindow LLToMercator(const TGeoWindow&);
+ TGeoWindow MercatorToLL(const TMercatorWindow&);
+
+ /**
+ * Utility functions
+ */
+
+ bool Contains(const TMaybe<TGeoWindow>& window, const TGeoPoint& point);
+
+ TMaybe<TGeoWindow> Union(const TMaybe<TGeoWindow>& lhs, const TMaybe<TGeoWindow>& rhs);
+ TGeoWindow Union(const TGeoWindow& lhs, const TGeoWindow& rhs);
+
+ TMaybe<TGeoWindow> Intersection(const TMaybe<TGeoWindow>& lhs, const TMaybe<TGeoWindow>& rhs);
+ TMaybe<TGeoWindow> Intersection(const TGeoWindow& lhs, const TGeoWindow& rhs);
+
+ bool Intersects(const TGeoWindow& lhs, const TGeoWindow& rhs);
+ bool Intersects(const TMaybe<TGeoWindow>& lhs, const TMaybe<TGeoWindow>& rhs);
+} // namespace NGeo
+
+template <>
+inline void Out<NGeo::TGeoWindow>(IOutputStream& o, const NGeo::TGeoWindow& obj) {
+ o << '{' << obj.GetCenter() << ", " << obj.GetSize() << ", " << obj.GetLowerLeftCorner() << ", " << obj.GetUpperRightCorner() << "}";
+}
diff --git a/library/cpp/geo/ya.make b/library/cpp/geo/ya.make
new file mode 100644
index 0000000000..1d36003c5c
--- /dev/null
+++ b/library/cpp/geo/ya.make
@@ -0,0 +1,19 @@
+LIBRARY()
+
+SRCS(
+ bbox.cpp
+ geo.cpp
+ point.cpp
+ polygon.cpp
+ load_save_helper.cpp
+ size.cpp
+ util.cpp
+ window.cpp
+)
+
+END()
+
+RECURSE_FOR_TESTS(
+ ut
+ style
+ )
diff --git a/library/cpp/geobase/CMakeLists.darwin-x86_64.txt b/library/cpp/geobase/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..b316e54e8a
--- /dev/null
+++ b/library/cpp/geobase/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,30 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_enum_parser_bin
+ TOOL_enum_parser_dependency
+ tools/enum_parser/enum_parser
+ enum_parser
+)
+
+add_library(library-cpp-geobase)
+target_link_libraries(library-cpp-geobase PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ geobase-library
+ tools-enum_parser-enum_serialization_runtime
+)
+target_sources(library-cpp-geobase PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/geobase/geobase.cpp
+)
+generate_enum_serilization(library-cpp-geobase
+ ${CMAKE_SOURCE_DIR}/geobase/include/structs.hpp
+ INCLUDE_HEADERS
+ geobase/include/structs.hpp
+)
diff --git a/library/cpp/geobase/CMakeLists.linux-aarch64.txt b/library/cpp/geobase/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..ab3962970d
--- /dev/null
+++ b/library/cpp/geobase/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,31 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_enum_parser_bin
+ TOOL_enum_parser_dependency
+ tools/enum_parser/enum_parser
+ enum_parser
+)
+
+add_library(library-cpp-geobase)
+target_link_libraries(library-cpp-geobase PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ geobase-library
+ tools-enum_parser-enum_serialization_runtime
+)
+target_sources(library-cpp-geobase PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/geobase/geobase.cpp
+)
+generate_enum_serilization(library-cpp-geobase
+ ${CMAKE_SOURCE_DIR}/geobase/include/structs.hpp
+ INCLUDE_HEADERS
+ geobase/include/structs.hpp
+)
diff --git a/library/cpp/geobase/CMakeLists.linux-x86_64.txt b/library/cpp/geobase/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..ab3962970d
--- /dev/null
+++ b/library/cpp/geobase/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,31 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_enum_parser_bin
+ TOOL_enum_parser_dependency
+ tools/enum_parser/enum_parser
+ enum_parser
+)
+
+add_library(library-cpp-geobase)
+target_link_libraries(library-cpp-geobase PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ geobase-library
+ tools-enum_parser-enum_serialization_runtime
+)
+target_sources(library-cpp-geobase PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/geobase/geobase.cpp
+)
+generate_enum_serilization(library-cpp-geobase
+ ${CMAKE_SOURCE_DIR}/geobase/include/structs.hpp
+ INCLUDE_HEADERS
+ geobase/include/structs.hpp
+)
diff --git a/library/cpp/geobase/CMakeLists.txt b/library/cpp/geobase/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/library/cpp/geobase/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/library/cpp/geobase/CMakeLists.windows-x86_64.txt b/library/cpp/geobase/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..b316e54e8a
--- /dev/null
+++ b/library/cpp/geobase/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,30 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_enum_parser_bin
+ TOOL_enum_parser_dependency
+ tools/enum_parser/enum_parser
+ enum_parser
+)
+
+add_library(library-cpp-geobase)
+target_link_libraries(library-cpp-geobase PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ geobase-library
+ tools-enum_parser-enum_serialization_runtime
+)
+target_sources(library-cpp-geobase PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/geobase/geobase.cpp
+)
+generate_enum_serilization(library-cpp-geobase
+ ${CMAKE_SOURCE_DIR}/geobase/include/structs.hpp
+ INCLUDE_HEADERS
+ geobase/include/structs.hpp
+)
diff --git a/library/cpp/geobase/geobase.cpp b/library/cpp/geobase/geobase.cpp
new file mode 100644
index 0000000000..24086c67a9
--- /dev/null
+++ b/library/cpp/geobase/geobase.cpp
@@ -0,0 +1,3 @@
+#include <library/cpp/geobase/lookup.hpp>
+#include <library/cpp/geobase/timezone_getter.hpp>
+#include <library/cpp/geobase/service_getter.hpp>
diff --git a/library/cpp/geobase/lookup.hpp b/library/cpp/geobase/lookup.hpp
new file mode 100644
index 0000000000..f663750ab2
--- /dev/null
+++ b/library/cpp/geobase/lookup.hpp
@@ -0,0 +1,44 @@
+#pragma once
+
+#include <geobase/include/lookup.hpp>
+#include <geobase/include/lookup_wrapper.hpp>
+#include <geobase/include/structs.hpp>
+
+namespace NGeobase {
+ using TInitTraits = NImpl::TLookup::TInitTraits;
+
+ class TLookup: public NImpl::TLookup {
+ public:
+ using parent = NImpl::TLookup;
+
+ explicit TLookup(const std::string& datafile, const TInitTraits traits = {})
+ : parent(datafile, traits)
+ {
+ }
+ explicit TLookup(const TInitTraits traits)
+ : parent(traits)
+ {
+ }
+ explicit TLookup(const void* pData, size_t len)
+ : parent(pData, len)
+ {
+ }
+
+ ~TLookup() {
+ }
+ };
+
+ using TRegion = NImpl::TRegion;
+ using TGeolocation = NImpl::TGeolocation;
+ using TLinguistics = NImpl::TLinguistics;
+ using TGeoPoint = NImpl::TGeoPoint;
+
+ using TLookupWrapper = NImpl::TLookupWrapper;
+
+ using TId = NImpl::Id;
+ using TIdsList = NImpl::IdsList;
+ using TRegionsList = NImpl::TRegionsList;
+
+ using TIpBasicTraits = NImpl::TIpBasicTraits;
+ using TIpTraits = NImpl::TIpTraits;
+}
diff --git a/library/cpp/geobase/service_getter.hpp b/library/cpp/geobase/service_getter.hpp
new file mode 100644
index 0000000000..e088081706
--- /dev/null
+++ b/library/cpp/geobase/service_getter.hpp
@@ -0,0 +1,7 @@
+#pragma once
+
+#include <geobase/include/service_getter.hpp>
+
+namespace NGeobase {
+ using TServiceGetter = NImpl::TServiceGetter;
+}
diff --git a/library/cpp/geobase/timezone_getter.hpp b/library/cpp/geobase/timezone_getter.hpp
new file mode 100644
index 0000000000..5749f1e3d6
--- /dev/null
+++ b/library/cpp/geobase/timezone_getter.hpp
@@ -0,0 +1,9 @@
+#pragma once
+
+#include <geobase/include/timezone_getter.hpp>
+#include <geobase/include/structs.hpp>
+
+namespace NGeobase {
+ using TTimezone = NImpl::TTimezone;
+ using TTimezoneGetter = NImpl::TTimezoneGetter;
+}
diff --git a/library/cpp/geobase/ya.make b/library/cpp/geobase/ya.make
new file mode 100644
index 0000000000..4a73974903
--- /dev/null
+++ b/library/cpp/geobase/ya.make
@@ -0,0 +1,13 @@
+LIBRARY()
+
+SRCS(
+ library/cpp/geobase/geobase.cpp
+)
+
+PEERDIR(
+ geobase/library
+)
+
+GENERATE_ENUM_SERIALIZATION(geobase/include/structs.hpp)
+
+END()
diff --git a/library/cpp/geohash/CMakeLists.darwin-x86_64.txt b/library/cpp/geohash/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..dfcb278a1f
--- /dev/null
+++ b/library/cpp/geohash/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,32 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_enum_parser_bin
+ TOOL_enum_parser_dependency
+ tools/enum_parser/enum_parser
+ enum_parser
+)
+
+add_library(library-cpp-geohash)
+target_link_libraries(library-cpp-geohash PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-geo
+ tools-enum_parser-enum_serialization_runtime
+)
+target_sources(library-cpp-geohash PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/geohash/geohash.cpp
+)
+generate_enum_serilization(library-cpp-geohash
+ ${CMAKE_SOURCE_DIR}/library/cpp/geohash/direction.h
+ GEN_HEADER
+ ${CMAKE_BINARY_DIR}/library/cpp/geohash/direction.h_serialized.h
+ INCLUDE_HEADERS
+ library/cpp/geohash/direction.h
+)
diff --git a/library/cpp/geohash/CMakeLists.linux-aarch64.txt b/library/cpp/geohash/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..a907311df0
--- /dev/null
+++ b/library/cpp/geohash/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,33 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_enum_parser_bin
+ TOOL_enum_parser_dependency
+ tools/enum_parser/enum_parser
+ enum_parser
+)
+
+add_library(library-cpp-geohash)
+target_link_libraries(library-cpp-geohash PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-geo
+ tools-enum_parser-enum_serialization_runtime
+)
+target_sources(library-cpp-geohash PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/geohash/geohash.cpp
+)
+generate_enum_serilization(library-cpp-geohash
+ ${CMAKE_SOURCE_DIR}/library/cpp/geohash/direction.h
+ GEN_HEADER
+ ${CMAKE_BINARY_DIR}/library/cpp/geohash/direction.h_serialized.h
+ INCLUDE_HEADERS
+ library/cpp/geohash/direction.h
+)
diff --git a/library/cpp/geohash/CMakeLists.linux-x86_64.txt b/library/cpp/geohash/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..a907311df0
--- /dev/null
+++ b/library/cpp/geohash/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,33 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_enum_parser_bin
+ TOOL_enum_parser_dependency
+ tools/enum_parser/enum_parser
+ enum_parser
+)
+
+add_library(library-cpp-geohash)
+target_link_libraries(library-cpp-geohash PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-geo
+ tools-enum_parser-enum_serialization_runtime
+)
+target_sources(library-cpp-geohash PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/geohash/geohash.cpp
+)
+generate_enum_serilization(library-cpp-geohash
+ ${CMAKE_SOURCE_DIR}/library/cpp/geohash/direction.h
+ GEN_HEADER
+ ${CMAKE_BINARY_DIR}/library/cpp/geohash/direction.h_serialized.h
+ INCLUDE_HEADERS
+ library/cpp/geohash/direction.h
+)
diff --git a/library/cpp/geohash/CMakeLists.txt b/library/cpp/geohash/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/library/cpp/geohash/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/library/cpp/geohash/CMakeLists.windows-x86_64.txt b/library/cpp/geohash/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..dfcb278a1f
--- /dev/null
+++ b/library/cpp/geohash/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,32 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_enum_parser_bin
+ TOOL_enum_parser_dependency
+ tools/enum_parser/enum_parser
+ enum_parser
+)
+
+add_library(library-cpp-geohash)
+target_link_libraries(library-cpp-geohash PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-geo
+ tools-enum_parser-enum_serialization_runtime
+)
+target_sources(library-cpp-geohash PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/geohash/geohash.cpp
+)
+generate_enum_serilization(library-cpp-geohash
+ ${CMAKE_SOURCE_DIR}/library/cpp/geohash/direction.h
+ GEN_HEADER
+ ${CMAKE_BINARY_DIR}/library/cpp/geohash/direction.h_serialized.h
+ INCLUDE_HEADERS
+ library/cpp/geohash/direction.h
+)
diff --git a/library/cpp/geohash/direction.h b/library/cpp/geohash/direction.h
new file mode 100644
index 0000000000..88a3e6061d
--- /dev/null
+++ b/library/cpp/geohash/direction.h
@@ -0,0 +1,14 @@
+#pragma once
+
+namespace NGeoHash {
+ enum EDirection {
+ NORTH = 0,
+ NORTH_EAST,
+ EAST,
+ SOUTH_EAST,
+ SOUTH,
+ SOUTH_WEST,
+ WEST,
+ NORTH_WEST,
+ };
+}
diff --git a/library/cpp/geohash/geohash.cpp b/library/cpp/geohash/geohash.cpp
new file mode 100644
index 0000000000..6c6d65acab
--- /dev/null
+++ b/library/cpp/geohash/geohash.cpp
@@ -0,0 +1,413 @@
+#include "geohash.h"
+
+#include <util/generic/xrange.h>
+
+namespace {
+ using TNeighbourDescriptors = NGeoHash::TNeighbours<TMaybe<NGeoHash::TGeoHashDescriptor>>;
+ const auto directions = GetEnumAllValues<NGeoHash::EDirection>();
+
+ const auto doubleEps = std::numeric_limits<double>::epsilon();
+
+ const NGeoHash::TBoundingBoxLL& GetGlobalBBox() {
+ static const NGeoHash::TBoundingBoxLL globalLimits({-180, -90}, {180, 90});
+ return globalLimits;
+ }
+
+ const TStringBuf base32EncodeTable = "0123456789bcdefghjkmnpqrstuvwxyz";
+
+ const ui64 base32DecodeMask = 0x1F;
+ constexpr int base32DecodeTableSize = 128;
+
+ using TBase32DecodeTable = std::array<TMaybe<i8>, base32DecodeTableSize>;
+
+ TBase32DecodeTable MakeBase32DecodeTable() {
+ TBase32DecodeTable result;
+ result.fill(Nothing());
+ for (auto i : xrange(base32EncodeTable.size())) {
+ result[base32EncodeTable[i]] = i;
+ }
+ return result;
+ }
+
+ const TBase32DecodeTable base32DecodeTable = MakeBase32DecodeTable();
+}
+
+namespace NGeoHash {
+ static const ui8 maxSteps = 62;
+ static const ui8 maxPrecision = TGeoHashDescriptor::StepsToPrecision(maxSteps); // 12
+
+ static const TNeighbours<std::pair<i8, i8>> neighborBitMoves = {
+ {1, 0}, // NORTH
+ {1, 1},
+ {0, 1},
+ {-1, 1},
+ {-1, 0},
+ {-1, -1},
+ {0, -1},
+ {1, -1},
+ };
+
+ ui8 TGeoHashDescriptor::StepsToPrecision(ui8 steps) {
+ return steps / StepsPerPrecisionUnit;
+ }
+
+ ui8 TGeoHashDescriptor::PrecisionToSteps(ui8 precision) {
+ return precision * StepsPerPrecisionUnit;
+ }
+
+ /* Steps interleave starting from lon so for 5 steps 3 are lon-steps and 2 are lat-steps.
+ * Thus there are ceil(step/2) lon-steps and floor(step/2) lat-steps */
+ std::pair<ui8, ui8> TGeoHashDescriptor::LatLonSteps() const {
+ return std::make_pair<ui8, ui8>(Steps / 2, (Steps + 1) / 2);
+ }
+
+ struct TMagicNumber {
+ ui64 Mask;
+ ui8 Shift;
+ };
+
+ /* Interleave lower bits of x and y, so the bits of x
+ * are in the even positions and bits from y in the odd.
+ * e.g. Interleave64(0b101, 0b110) => 0b111001
+ * From: https://graphics.stanford.edu/~seander/bithacks.html#InterleaveBMN
+ */
+ ui64 TGeoHashDescriptor::Interleave64(ui32 x, ui32 y) {
+ // attention: magic numbers
+ constexpr TMagicNumber mortonMagicNumbers[] = {
+ {0x0000FFFF0000FFFF, 16},
+ {0x00FF00FF00FF00FF, 8},
+ {0x0F0F0F0F0F0F0F0F, 4},
+ {0x3333333333333333, 2},
+ {0x5555555555555555, 1}};
+
+ ui64 x64 = x;
+ ui64 y64 = y;
+
+ for (const auto& magicNumber : mortonMagicNumbers) {
+ x64 = (x64 | (x64 << magicNumber.Shift)) & magicNumber.Mask;
+ y64 = (y64 | (y64 << magicNumber.Shift)) & magicNumber.Mask;
+ }
+ return x64 | (y64 << 1);
+ }
+
+ /* Reverse the interleave process
+ * Deinterleave64(0b111001) => 0b101110
+ * derived from http://stackoverflow.com/questions/4909263 */
+ std::pair<ui32, ui32> TGeoHashDescriptor::Deinterleave64(ui64 z) {
+ constexpr TMagicNumber demortonMagicNumbers[] = {
+ {0x5555555555555555ULL, 0},
+ {0x3333333333333333ULL, 1},
+ {0x0F0F0F0F0F0F0F0FULL, 2},
+ {0x00FF00FF00FF00FFULL, 4},
+ {0x0000FFFF0000FFFFULL, 8},
+ {0x00000000FFFFFFFFULL, 16}};
+
+ ui64 x = z;
+ ui64 y = z >> 1;
+
+ for (const auto& magicNumber : demortonMagicNumbers) {
+ x = (x | (x >> magicNumber.Shift)) & magicNumber.Mask;
+ y = (y | (y >> magicNumber.Shift)) & magicNumber.Mask;
+ }
+
+ return std::make_pair(x, y);
+ }
+
+ std::pair<ui32, ui32> TGeoHashDescriptor::LatLonBits() const {
+ auto deinterleaved = Deinterleave64(Bits);
+
+ if (Steps % 2) {
+ DoSwap(deinterleaved.first, deinterleaved.second);
+ }
+ return deinterleaved;
+ }
+
+ void TGeoHashDescriptor::SetLatLonBits(ui32 latBits, ui32 lonBits) {
+ if (Steps % 2) {
+ Bits = Interleave64(lonBits, latBits);
+ } else {
+ Bits = Interleave64(latBits, lonBits);
+ }
+ }
+
+ void TGeoHashDescriptor::InitFromLatLon(double latitude, double longitude, const TBoundingBoxLL& limits, ui8 steps) {
+ Steps = steps;
+ if (Steps > maxSteps) {
+ ythrow yexception() << "Invalid steps: available values: 0.." << ::ToString(maxSteps);
+ }
+
+ if (limits.Width() < doubleEps || limits.Height() < doubleEps) {
+ ythrow yexception() << "Invalid limits: min/max for one of coordinates are equal";
+ }
+
+ if (latitude < limits.GetMinY() || latitude > limits.GetMaxY() || longitude < limits.GetMinX() || longitude > limits.GetMaxX()) {
+ ythrow yexception() << "Invalid point (" << latitude << ", " << longitude << "): outside of limits";
+ }
+
+ double lat01 = (latitude - limits.GetMinY()) / limits.Height();
+ double lon01 = (longitude - limits.GetMinX()) / limits.Width();
+
+ auto llSteps = LatLonSteps();
+
+ /* convert to fixed point based on the step size */
+ lat01 *= (1 << llSteps.first);
+ lon01 *= (1 << llSteps.second);
+
+ /* If lon_steps > lat_step, last bit is lon-bit, otherwise last bit is lat-bit*/
+ SetLatLonBits(lat01, lon01);
+ }
+
+ TGeoHashDescriptor::TGeoHashDescriptor(double latitude, double longitude, const TBoundingBoxLL& limits, ui8 steps) {
+ InitFromLatLon(latitude, longitude, limits, steps);
+ }
+
+ TGeoHashDescriptor::TGeoHashDescriptor(double latitude, double longitude, ui8 steps) {
+ InitFromLatLon(latitude, longitude, GetGlobalBBox(), steps);
+ }
+
+ TGeoHashDescriptor::TGeoHashDescriptor(const NGeo::TPointLL& point, const TBoundingBoxLL& limits, ui8 steps) {
+ InitFromLatLon(point.Lat(), point.Lon(), limits, steps);
+ }
+
+ TGeoHashDescriptor::TGeoHashDescriptor(const NGeo::TPointLL& point, ui8 steps) {
+ InitFromLatLon(point.Lat(), point.Lon(), GetGlobalBBox(), steps);
+ }
+
+ TGeoHashDescriptor::TGeoHashDescriptor(const TString& hashString) {
+ if (hashString.size() > maxPrecision) {
+ ythrow yexception() << "hashString is too long: max length is " << ::ToString(maxPrecision);
+ }
+
+ Bits = 0;
+ for (auto c : hashString) {
+ Bits <<= StepsPerPrecisionUnit;
+ Y_ENSURE(c >= 0);
+ const auto decodedChar = base32DecodeTable[c];
+ Y_ENSURE(decodedChar.Defined());
+ Bits |= decodedChar.GetRef();
+ }
+
+ Steps = PrecisionToSteps(hashString.size());
+ }
+
+ ui64 TGeoHashDescriptor::GetBits() const {
+ return Bits;
+ }
+
+ ui8 TGeoHashDescriptor::GetSteps() const {
+ return Steps;
+ }
+
+ TString TGeoHashDescriptor::ToString() const {
+ auto precision = StepsToPrecision(Steps);
+
+ TStringStream stream;
+
+ auto bits = Bits;
+ auto activeSteps = PrecisionToSteps(precision);
+
+ bits >>= (Steps - activeSteps);
+ for (auto i : xrange(precision)) {
+ auto ix = (bits >> (StepsPerPrecisionUnit * ((precision - i - 1)))) & base32DecodeMask;
+ stream << base32EncodeTable[ix];
+ }
+
+ return stream.Str();
+ }
+
+ TBoundingBoxLL TGeoHashDescriptor::ToBoundingBox(const TBoundingBoxLL& limits) const {
+ auto llBits = LatLonBits();
+ auto llSteps = LatLonSteps();
+
+ double latMultiplier = limits.Height() / (1ull << llSteps.first);
+ double lonMultiplier = limits.Width() / (1ull << llSteps.second);
+
+ return {
+ {
+ limits.GetMinX() + lonMultiplier * llBits.second,
+ limits.GetMinY() + latMultiplier * llBits.first,
+ },
+ {
+ limits.GetMinX() + lonMultiplier * (llBits.second + 1),
+ limits.GetMinY() + latMultiplier * (llBits.first + 1),
+ }};
+ }
+
+ TBoundingBoxLL TGeoHashDescriptor::ToBoundingBox() const {
+ return ToBoundingBox(GetGlobalBBox());
+ }
+
+ NGeo::TPointLL TGeoHashDescriptor::ToPoint(const TBoundingBoxLL& limits) const {
+ auto boundingBox = ToBoundingBox(limits);
+ return {
+ boundingBox.GetMinX() + boundingBox.Width() / 2,
+ boundingBox.GetMinY() + boundingBox.Height() / 2};
+ }
+
+ NGeo::TPointLL TGeoHashDescriptor::ToPoint() const {
+ return ToPoint(GetGlobalBBox());
+ }
+
+ TMaybe<TGeoHashDescriptor> TGeoHashDescriptor::GetNeighbour(EDirection direction) const {
+ TGeoHashDescriptor result(0, Steps);
+ auto llBits = LatLonBits();
+ auto llSteps = LatLonSteps();
+ std::pair<i8, i8> bitMove = neighborBitMoves[direction];
+
+ auto newLatBits = llBits.first + bitMove.first;
+ auto newLonBits = llBits.second + bitMove.second;
+
+ // Overflow in lat means polar, so return Nothing
+ if (newLatBits >> llSteps.first != 0) {
+ return Nothing();
+ }
+
+ // Overflow in lon means 180-meridian, so just remove overflowed bits
+ newLonBits &= ((1 << llSteps.second) - 1);
+ result.SetLatLonBits(newLatBits, newLonBits);
+ return result;
+ }
+
+ TNeighbourDescriptors TGeoHashDescriptor::GetNeighbours() const {
+ TNeighbourDescriptors result;
+ auto llBits = LatLonBits();
+ auto llSteps = LatLonSteps();
+ std::pair<i8, i8> bitMove;
+
+ for (auto direction : directions) {
+ bitMove = neighborBitMoves[direction];
+
+ auto newLatBits = llBits.first + bitMove.first;
+ auto newLonBits = llBits.second + bitMove.second;
+
+ // Overflow in lat means polar, so put Nothing
+ if (newLatBits >> llSteps.first != 0) {
+ result[direction] = Nothing();
+ } else {
+ result[direction] = TGeoHashDescriptor(0, Steps);
+ // Overflow in lon means 180-meridian, so just remove overflowed bits
+ newLonBits &= ((1 << llSteps.second) - 1);
+ result[direction]->SetLatLonBits(newLatBits, newLonBits);
+ }
+ }
+
+ return result;
+ }
+
+ TVector<TGeoHashDescriptor> TGeoHashDescriptor::GetChildren(ui8 steps = StepsPerPrecisionUnit) const {
+ TVector<TGeoHashDescriptor> children(Reserve(1 << steps));
+ ui8 childrenSteps = steps + Steps;
+ auto parentBits = Bits << steps;
+ if (childrenSteps > maxSteps) {
+ ythrow yexception() << "Resulting geohash steps are too big, available values: 0.." << ::ToString(maxSteps);
+ }
+ for (auto residue : xrange(1 << steps)) {
+ children.emplace_back(parentBits | residue, childrenSteps);
+ }
+ return children;
+ }
+
+ /* Functions */
+
+ ui64 Encode(double latitude, double longitude, ui8 precision) {
+ auto descr = TGeoHashDescriptor(
+ latitude, longitude, TGeoHashDescriptor::PrecisionToSteps(precision));
+ return descr.GetBits();
+ }
+ ui64 Encode(const NGeo::TPointLL& point, ui8 precision) {
+ return TGeoHashDescriptor(
+ point, TGeoHashDescriptor::PrecisionToSteps(precision))
+ .GetBits();
+ }
+
+ TString EncodeToString(double latitude, double longitude, ui8 precision) {
+ return TGeoHashDescriptor(
+ latitude, longitude, TGeoHashDescriptor::PrecisionToSteps(precision))
+ .ToString();
+ }
+ TString EncodeToString(const NGeo::TPointLL& point, ui8 precision) {
+ return TGeoHashDescriptor(
+ point, TGeoHashDescriptor::PrecisionToSteps(precision))
+ .ToString();
+ }
+
+ NGeo::TPointLL DecodeToPoint(const TString& hashString) {
+ return TGeoHashDescriptor(hashString).ToPoint();
+ }
+ NGeo::TPointLL DecodeToPoint(ui64 hash, ui8 precision) {
+ return TGeoHashDescriptor(hash, TGeoHashDescriptor::PrecisionToSteps(precision)).ToPoint();
+ }
+
+ TBoundingBoxLL DecodeToBoundingBox(const TString& hashString) {
+ return TGeoHashDescriptor(hashString).ToBoundingBox();
+ }
+
+ TBoundingBoxLL DecodeToBoundingBox(ui64 hash, ui8 precision) {
+ return TGeoHashDescriptor(hash, TGeoHashDescriptor::PrecisionToSteps(precision)).ToBoundingBox();
+ }
+
+ TMaybe<ui64> GetNeighbour(ui64 hash, EDirection direction, ui8 precision) {
+ auto neighbour = TGeoHashDescriptor(
+ hash, TGeoHashDescriptor::PrecisionToSteps(precision))
+ .GetNeighbour(direction);
+
+ if (neighbour.Defined()) {
+ return neighbour->GetBits();
+ } else {
+ return Nothing();
+ }
+ }
+
+ TMaybe<TString> GetNeighbour(const TString& hashString, EDirection direction) {
+ auto neighbour = TGeoHashDescriptor(hashString).GetNeighbour(direction);
+ if (neighbour.Defined()) {
+ return neighbour->ToString();
+ } else {
+ return Nothing();
+ }
+ }
+
+ TGeoHashBitsNeighbours GetNeighbours(ui64 hash, ui8 precision) {
+ TGeoHashBitsNeighbours result;
+
+ auto neighbours = TGeoHashDescriptor(
+ hash, TGeoHashDescriptor::PrecisionToSteps(precision))
+ .GetNeighbours();
+
+ for (auto direction : directions) {
+ if (neighbours[direction].Defined()) {
+ result[direction] = neighbours[direction]->GetBits();
+ } else {
+ result[direction] = Nothing();
+ }
+ }
+
+ return result;
+ }
+
+ TGeoHashStringNeighbours GetNeighbours(const TString& hashString) {
+ TGeoHashStringNeighbours result;
+
+ auto neighbours = TGeoHashDescriptor(
+ hashString)
+ .GetNeighbours();
+
+ for (auto direction : directions) {
+ if (neighbours[direction].Defined()) {
+ result[direction] = neighbours[direction]->ToString();
+ } else {
+ result[direction] = Nothing();
+ }
+ }
+ return result;
+ }
+
+ TVector<TString> GetChildren(const TString& hashString) {
+ TVector<TString> result(Reserve(base32EncodeTable.size()));
+
+ for (auto ch : base32EncodeTable) {
+ result.push_back(hashString + ch);
+ }
+ return result;
+ }
+}
diff --git a/library/cpp/geohash/geohash.h b/library/cpp/geohash/geohash.h
new file mode 100644
index 0000000000..7d270612e8
--- /dev/null
+++ b/library/cpp/geohash/geohash.h
@@ -0,0 +1,123 @@
+#pragma once
+
+/**
+ * @file
+ * @brief Strong (because it works) and independent (of contrib/libs/geohash) GeoHash implementation
+ * GeoHash algo: https://en.wikipedia.org/wiki/Geohash
+ * Useful links:
+ * 1. http://geohash.org - Main Site
+ * 2. https://dou.ua/lenta/articles/geohash - Geohash-based geopoints clusterization
+ * 3. http://www.movable-type.co.uk/scripts/geohash.html - bidirectional encoding and visualization
+ */
+#include <library/cpp/geohash/direction.h>
+#include <library/cpp/geohash/direction.h_serialized.h>
+
+#include <library/cpp/geo/geo.h>
+
+#include <util/generic/maybe.h>
+#include <util/generic/string.h>
+#include <util/system/types.h>
+
+#include <array>
+
+namespace NGeoHash {
+ using TBoundingBoxLL = NGeo::TGeoBoundingBox;
+ static constexpr auto directionsCount = GetEnumItemsCount<EDirection>();
+
+ template <class T>
+ class TNeighbours: public std::array<T, directionsCount> {
+ public:
+ TNeighbours() = default;
+
+ TNeighbours(std::initializer_list<T> list) {
+ Y_ASSERT(list.size() == directionsCount);
+ std::copy(list.begin(), list.end(), std::array<T, directionsCount>::begin());
+ }
+
+ const T& operator[](EDirection direction) const {
+ return std::array<T, directionsCount>::operator[](static_cast<size_t>(direction));
+ }
+
+ T& operator[](EDirection direction) {
+ return std::array<T, directionsCount>::operator[](static_cast<size_t>(direction));
+ }
+ };
+
+ class TGeoHashDescriptor {
+ public:
+ TGeoHashDescriptor() noexcept
+ : Bits(0)
+ , Steps(0)
+ {
+ }
+
+ TGeoHashDescriptor(ui64 bits, ui8 steps) noexcept
+ : Bits(bits)
+ , Steps(steps)
+ {
+ }
+
+ TGeoHashDescriptor(double latitude, double longitude, ui8 steps);
+ TGeoHashDescriptor(double latitude, double longitude, const TBoundingBoxLL& limits, ui8 steps);
+ TGeoHashDescriptor(const NGeo::TPointLL& point, ui8 steps);
+ TGeoHashDescriptor(const NGeo::TPointLL& point, const TBoundingBoxLL& limits, ui8 steps);
+
+ explicit TGeoHashDescriptor(const TString& hashString);
+
+ ui64 GetBits() const;
+ ui8 GetSteps() const;
+
+ TString ToString() const;
+
+ NGeo::TPointLL ToPoint(const TBoundingBoxLL& limits) const;
+ NGeo::TPointLL ToPoint() const;
+
+ TBoundingBoxLL ToBoundingBox(const TBoundingBoxLL& limits) const;
+ TBoundingBoxLL ToBoundingBox() const;
+
+ TMaybe<TGeoHashDescriptor> GetNeighbour(EDirection direction) const;
+ TNeighbours<TMaybe<TGeoHashDescriptor>> GetNeighbours() const;
+
+ TVector<TGeoHashDescriptor> GetChildren(ui8 steps) const;
+
+ static ui8 StepsToPrecision(ui8 steps);
+ static ui8 PrecisionToSteps(ui8 precision);
+
+ private:
+ void InitFromLatLon(double latitude, double longitude, const TBoundingBoxLL& limits, ui8 steps);
+ std::pair<ui8, ui8> LatLonSteps() const;
+ std::pair<ui32, ui32> LatLonBits() const;
+ void SetLatLonBits(ui32 latBits, ui32 lonBits);
+ static ui64 Interleave64(ui32 x, ui32 y);
+ static std::pair<ui32, ui32> Deinterleave64(ui64 interleaved);
+
+ private:
+ static const ui8 StepsPerPrecisionUnit = 5;
+ ui64 Bits;
+ ui8 Steps;
+ };
+
+ ui64 Encode(double latitude, double longitude, ui8 precision);
+ ui64 Encode(const NGeo::TPointLL& point, ui8 precision);
+
+ TString EncodeToString(double latitude, double longitude, ui8 precision);
+ TString EncodeToString(const NGeo::TPointLL& point, ui8 precision);
+
+ NGeo::TPointLL DecodeToPoint(const TString& hashString);
+ NGeo::TPointLL DecodeToPoint(ui64 hash, ui8 precision);
+
+ TBoundingBoxLL DecodeToBoundingBox(const TString& hashString);
+ TBoundingBoxLL DecodeToBoundingBox(ui64 hash, ui8 precision);
+
+ TMaybe<ui64> GetNeighbour(ui64 hash, EDirection direction, ui8 precision);
+ TMaybe<TString> GetNeighbour(const TString& hashString, EDirection direction);
+
+ using TGeoHashBitsNeighbours = TNeighbours<TMaybe<ui64>>;
+ using TGeoHashStringNeighbours = TNeighbours<TMaybe<TString>>;
+
+ TGeoHashBitsNeighbours GetNeighbours(ui64 hash, ui8 precision);
+ TGeoHashStringNeighbours GetNeighbours(const TString& hashString);
+
+ TVector<TString> GetChildren(const TString& hashString);
+
+} /* namespace NGeoHash */
diff --git a/library/cpp/geohash/ya.make b/library/cpp/geohash/ya.make
new file mode 100644
index 0000000000..3350ca1cc6
--- /dev/null
+++ b/library/cpp/geohash/ya.make
@@ -0,0 +1,13 @@
+LIBRARY()
+
+PEERDIR(
+ library/cpp/geo
+)
+
+SRCS(
+ geohash.cpp
+)
+
+GENERATE_ENUM_SERIALIZATION_WITH_HEADER(direction.h)
+
+END()
diff --git a/library/cpp/ipreg/CMakeLists.darwin-x86_64.txt b/library/cpp/ipreg/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..05b000b7da
--- /dev/null
+++ b/library/cpp/ipreg/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,53 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_enum_parser_bin
+ TOOL_enum_parser_dependency
+ tools/enum_parser/enum_parser
+ enum_parser
+)
+get_built_tool_path(
+ TOOL_enum_parser_bin
+ TOOL_enum_parser_dependency
+ tools/enum_parser/enum_parser
+ enum_parser
+)
+
+add_library(library-cpp-ipreg)
+target_link_libraries(library-cpp-ipreg PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ cpp-getopt-small
+ library-cpp-json
+ library-cpp-geobase
+ library-cpp-int128
+ tools-enum_parser-enum_serialization_runtime
+)
+target_sources(library-cpp-ipreg PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/address.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/checker.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/merge.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/range.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/reader.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/sources.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/split.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/stopwatch.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/writer.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/util_helpers.cpp
+)
+generate_enum_serilization(library-cpp-ipreg
+ ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/address.h
+ INCLUDE_HEADERS
+ library/cpp/ipreg/address.h
+)
+generate_enum_serilization(library-cpp-ipreg
+ ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/sources.h
+ INCLUDE_HEADERS
+ library/cpp/ipreg/sources.h
+)
diff --git a/library/cpp/ipreg/CMakeLists.linux-aarch64.txt b/library/cpp/ipreg/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..5e76739840
--- /dev/null
+++ b/library/cpp/ipreg/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,54 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_enum_parser_bin
+ TOOL_enum_parser_dependency
+ tools/enum_parser/enum_parser
+ enum_parser
+)
+get_built_tool_path(
+ TOOL_enum_parser_bin
+ TOOL_enum_parser_dependency
+ tools/enum_parser/enum_parser
+ enum_parser
+)
+
+add_library(library-cpp-ipreg)
+target_link_libraries(library-cpp-ipreg PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ cpp-getopt-small
+ library-cpp-json
+ library-cpp-geobase
+ library-cpp-int128
+ tools-enum_parser-enum_serialization_runtime
+)
+target_sources(library-cpp-ipreg PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/address.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/checker.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/merge.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/range.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/reader.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/sources.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/split.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/stopwatch.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/writer.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/util_helpers.cpp
+)
+generate_enum_serilization(library-cpp-ipreg
+ ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/address.h
+ INCLUDE_HEADERS
+ library/cpp/ipreg/address.h
+)
+generate_enum_serilization(library-cpp-ipreg
+ ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/sources.h
+ INCLUDE_HEADERS
+ library/cpp/ipreg/sources.h
+)
diff --git a/library/cpp/ipreg/CMakeLists.linux-x86_64.txt b/library/cpp/ipreg/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..5e76739840
--- /dev/null
+++ b/library/cpp/ipreg/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,54 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_enum_parser_bin
+ TOOL_enum_parser_dependency
+ tools/enum_parser/enum_parser
+ enum_parser
+)
+get_built_tool_path(
+ TOOL_enum_parser_bin
+ TOOL_enum_parser_dependency
+ tools/enum_parser/enum_parser
+ enum_parser
+)
+
+add_library(library-cpp-ipreg)
+target_link_libraries(library-cpp-ipreg PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ cpp-getopt-small
+ library-cpp-json
+ library-cpp-geobase
+ library-cpp-int128
+ tools-enum_parser-enum_serialization_runtime
+)
+target_sources(library-cpp-ipreg PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/address.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/checker.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/merge.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/range.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/reader.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/sources.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/split.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/stopwatch.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/writer.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/util_helpers.cpp
+)
+generate_enum_serilization(library-cpp-ipreg
+ ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/address.h
+ INCLUDE_HEADERS
+ library/cpp/ipreg/address.h
+)
+generate_enum_serilization(library-cpp-ipreg
+ ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/sources.h
+ INCLUDE_HEADERS
+ library/cpp/ipreg/sources.h
+)
diff --git a/library/cpp/ipreg/CMakeLists.txt b/library/cpp/ipreg/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/library/cpp/ipreg/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/library/cpp/ipreg/CMakeLists.windows-x86_64.txt b/library/cpp/ipreg/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..05b000b7da
--- /dev/null
+++ b/library/cpp/ipreg/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,53 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_enum_parser_bin
+ TOOL_enum_parser_dependency
+ tools/enum_parser/enum_parser
+ enum_parser
+)
+get_built_tool_path(
+ TOOL_enum_parser_bin
+ TOOL_enum_parser_dependency
+ tools/enum_parser/enum_parser
+ enum_parser
+)
+
+add_library(library-cpp-ipreg)
+target_link_libraries(library-cpp-ipreg PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ cpp-getopt-small
+ library-cpp-json
+ library-cpp-geobase
+ library-cpp-int128
+ tools-enum_parser-enum_serialization_runtime
+)
+target_sources(library-cpp-ipreg PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/address.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/checker.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/merge.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/range.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/reader.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/sources.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/split.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/stopwatch.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/writer.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/util_helpers.cpp
+)
+generate_enum_serilization(library-cpp-ipreg
+ ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/address.h
+ INCLUDE_HEADERS
+ library/cpp/ipreg/address.h
+)
+generate_enum_serilization(library-cpp-ipreg
+ ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/sources.h
+ INCLUDE_HEADERS
+ library/cpp/ipreg/sources.h
+)
diff --git a/library/cpp/ipreg/address.cpp b/library/cpp/ipreg/address.cpp
new file mode 100644
index 0000000000..83880ccbae
--- /dev/null
+++ b/library/cpp/ipreg/address.cpp
@@ -0,0 +1,365 @@
+#include "address.h"
+
+#include <util/generic/mem_copy.h>
+#include <util/stream/format.h>
+#include <util/string/cast.h>
+#include <util/string/hex.h>
+#include <util/string/printf.h>
+#include <util/string/split.h>
+#include <util/string/type.h>
+#include <util/string/vector.h>
+#include <util/system/byteorder.h>
+#include <util/network/socket.h>
+
+#include <sstream>
+
+namespace NIPREG {
+
+TAddress TAddress::ParseAny(TStringBuf str) {
+ if (str.find(':') != TStringBuf::npos) {
+ return ParseIPv6(str);
+ } else if (str.find('.') != TStringBuf::npos) {
+ return ParseIPv4(str);
+ } else if (IsNumber(str)) {
+ return ParseIPv4Num(str); // TODO(dieash@) IPv6Num
+ }
+
+ ythrow yexception() << "Unrecognized IPREG address format: " << str;
+}
+
+TAddress TAddress::ParseIPv6(TStringBuf str) {
+ TAddress addr;
+ if (inet_pton(AF_INET6, TString(str).c_str(), &addr.Data) != 1)
+ ythrow yexception() << "Failed to parse IPREG address " << str << " as IPv6";
+
+ return addr;
+}
+
+TAddress TAddress::ParseIPv4(TStringBuf str) {
+ struct in_addr ipv4;
+ if (inet_aton(TString(str).c_str(), &ipv4) != 1)
+ ythrow yexception() << "Failed to parse IPREG address " << str << " as IPv4";
+
+ return FromIPv4Num(InetToHost(ipv4.s_addr));
+}
+
+TAddress TAddress::ParseIPv4Num(TStringBuf str) {
+ return FromIPv4Num(FromString<ui32>(str));
+}
+
+TAddress TAddress::ParseIPv6Num(TStringBuf str) {
+ return FromUint128(FromString<ui128>(str));
+}
+
+TAddress TAddress::FromBinary(unsigned char const * const data) {
+ TAddress addr;
+ MemCopy<unsigned char>(addr.Data, data, sizeof(addr.Data));
+ return addr;
+}
+
+TAddress TAddress::FromBinaryIPv4(unsigned char const * const data) {
+ return TAddress::FromIPv4Num(
+ (static_cast<ui32>(data[0]) << 24) |
+ (static_cast<ui32>(data[1]) << 16) |
+ (static_cast<ui32>(data[2]) << 8) |
+ (static_cast<ui32>(data[3]))
+ );
+}
+
+TAddress TAddress::FromIPv4Num(ui32 num) {
+ TAddress addr;
+ memset((void*)&addr.Data, 0x00, 10);
+ addr.Data[10] = 0xff;
+ addr.Data[11] = 0xff;
+ addr.Data[12] = (num >> 24) & 0xff;
+ addr.Data[13] = (num >> 16) & 0xff;
+ addr.Data[14] = (num >> 8) & 0xff;
+ addr.Data[15] = (num) & 0xff;
+ return addr;
+}
+
+TAddress TAddress::FromUint128(ui128 intAddr) {
+ const auto hiBE = HostToInet(GetHigh(intAddr));
+ const auto loBE = HostToInet(GetLow(intAddr));
+
+ TAddress addr;
+ ui64* dataPtr = reinterpret_cast<ui64*>(addr.Data);
+ MemCopy<ui64>(dataPtr, &hiBE, 1);
+ MemCopy<ui64>(dataPtr + 1, &loBE, 1);
+
+ return addr;
+}
+
+namespace {
+ void SetHostsBits(TAddress& addr, char value) {
+ addr.Data[ 8] = value;
+ addr.Data[ 9] = value;
+ addr.Data[10] = value;
+ addr.Data[11] = value;
+ addr.Data[12] = value;
+ addr.Data[13] = value;
+ addr.Data[14] = value;
+ addr.Data[15] = value;
+ }
+} // anon-ns
+
+TAddress TAddress::MakeNet64Broadcast(TAddress base) {
+ SetHostsBits(base, 0xff);
+ return base;
+}
+
+TAddress TAddress::MakeNet64Prefix(TAddress base) {
+ SetHostsBits(base, 0x00);
+ return base;
+}
+
+const TAddress& TAddress::Lowest() {
+ static const TAddress first{{}};
+ return first;
+}
+
+const TAddress& TAddress::Highest() {
+ static const TAddress last{{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}};
+ return last;
+}
+
+TString TAddress::AsIPv4() const {
+ return ToString(Data[12]) + "." + ToString(Data[13]) + "." + ToString(Data[14]) + "." + ToString(Data[15]);
+}
+
+TString TAddress::AsIPv4Num() const {
+ ui32 addr = (ui32)Data[12] << 24 | (ui32)Data[13] << 16 | (ui32)Data[14] << 8 | Data[15];
+ return ToString(addr);
+}
+
+TString TAddress::AsIPv6() const {
+ TStringStream ss;
+
+ for (size_t octet = 0; octet < sizeof(Data); octet++) {
+ ss << Hex(Data[octet], HF_FULL);
+ if (octet < 15 && octet & 1)
+ ss << ':';
+ }
+
+ TString s = ss.Str();
+ s.to_lower();
+
+ return s;
+}
+
+TString TAddress::AsIPv6Num() const {
+ return ToString(AsUint128());
+}
+
+TString TAddress::GetTextFromNetOrder() const {
+ char buf[INET6_ADDRSTRLEN];
+ if (inet_ntop(AF_INET6, (void*)(&Data), buf, sizeof(buf)) == NULL)
+ ythrow yexception() << "Failed to stringify IPREG address";
+
+ return buf;
+}
+
+namespace {
+ TString GetHexStr(ui64 v) {
+ return HexEncode(reinterpret_cast<const char*>(&v), sizeof(v));
+ }
+
+ void HexDumpToStream(std::stringstream& ss, ui64 beData) {
+ const auto dataHexStr = GetHexStr(beData);
+ const auto hostData = InetToHost(beData);
+ const auto hostDataStr = GetHexStr(hostData);
+ ss << "\t/big-end[" << beData << " / " << dataHexStr << "]\t/host[" << hostData << " / " << hostDataStr << "]\n";
+ }
+} // anon-ns
+
+TString TAddress::GetHexString(const bool deepView) const {
+ std::stringstream ss;
+ ss << HexEncode(TStringBuf(reinterpret_cast<const char*>(Data), 16));
+ if (deepView) {
+ const ui64* dataPtr = reinterpret_cast<const ui64*>(Data);
+
+ const auto hi = *dataPtr;
+ ss << "\nhigh-data"; HexDumpToStream(ss, hi);
+
+ const auto lo = *(dataPtr + 1);
+ ss << "\nlow-data"; HexDumpToStream(ss, lo);
+ }
+ return ss.str().c_str();
+}
+
+TString TAddress::AsShortIP() const {
+ if (IsIPv4())
+ return AsIPv4();
+ else
+ return GetTextFromNetOrder();
+}
+
+TString TAddress::AsShortIPv6() const {
+ if (IsIPv4())
+ return Sprintf("::ffff:%x:%x", (ui32)Data[12] << 8 | (ui32)Data[13], (ui32)Data[14] << 8 | (ui32)Data[15]);
+ else
+ return GetTextFromNetOrder();
+}
+
+TString TAddress::AsLongIP() const {
+ if (IsIPv4())
+ return AsIPv4();
+ else
+ return AsIPv6();
+}
+
+ui128 TAddress::AsUint128() const {
+ const ui64* dataPtr = reinterpret_cast<const ui64*>(Data);
+ return ui128(InetToHost(*dataPtr), InetToHost(*(dataPtr + 1)));
+}
+
+ui64 TAddress::GetHigh64() const {
+ const ui64* dataPtr = reinterpret_cast<const ui64*>(Data);
+ return *dataPtr;
+}
+
+ui64 TAddress::GetLow64() const {
+ const ui64* dataPtr = reinterpret_cast<const ui64*>(Data);
+ return *(dataPtr + 1);
+}
+
+ui64 TAddress::GetHigh64LE() const {
+ return InetToHost(GetHigh64());
+}
+
+ui64 TAddress::GetLow64LE() const {
+ return InetToHost(GetLow64());
+}
+
+bool TAddress::IsNet64Broadcast() const {
+ static const auto NET64_HOSTS_MASK = TAddress::ParseAny("::ffff:ffff:ffff:ffff").GetLow64();
+ const auto ownHostsBits = GetLow64();
+ return ownHostsBits == NET64_HOSTS_MASK;
+}
+
+bool TAddress::IsNet64Host() const {
+ const auto isSomeOwnHostsBitsOn = GetLow64() > 0;
+ return isSomeOwnHostsBitsOn && !IsNet64Broadcast();
+}
+
+TString TAddress::Format(EAddressFormat format) const {
+ switch (format) {
+ case EAddressFormat::IPV6:
+ return AsIPv6();
+ case EAddressFormat::LONG_IP:
+ return AsLongIP();
+ case EAddressFormat::SHORT_IP:
+ return AsShortIP();
+ case EAddressFormat::NUMERIC_IPV4:
+ return AsIPv4Num();
+ case EAddressFormat::NUMERIC_IPV6:
+ return AsIPv6Num();
+ case EAddressFormat::NTOA:
+ return GetTextFromNetOrder();
+ case EAddressFormat::SHORT_IPV6:
+ return AsShortIPv6();
+ }
+}
+
+bool TAddress::IsIPv4() const {
+ static const unsigned char mask[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff };
+ return memcmp(Data, mask, sizeof(mask)) == 0;
+}
+
+TAddress TAddress::Next() const {
+ if (Highest() == *this) {
+ return Highest();
+ }
+
+ TAddress addr;
+ bool carry = 1;
+ for (ssize_t octet = 15; octet >= 0; octet--) {
+ addr.Data[octet] = Data[octet] + carry;
+ carry = carry && !addr.Data[octet];
+ }
+
+ return addr;
+}
+
+TAddress TAddress::Prev() const {
+ if (Lowest() == *this) {
+ return Lowest();
+ }
+
+ TAddress addr{};
+ bool carry = 1;
+ for (ssize_t octet = 15; octet >= 0; octet--) {
+ addr.Data[octet] = Data[octet] - carry;
+ carry = carry && !Data[octet];
+ }
+
+ return addr;
+}
+
+double TAddress::operator-(const TAddress& rhs) const {
+ double diff = 0.0;
+ for (ssize_t octet = 0; octet < 16; octet++) {
+ diff = diff * 256.0 + (static_cast<int>(Data[octet]) - static_cast<int>(rhs.Data[octet]));
+ }
+ return diff;
+}
+
+ui128 TAddress::Distance(const TAddress& a, const TAddress& b) {
+ const auto& intA = a.AsUint128();
+ const auto& intB = b.AsUint128();
+ return (a > b) ? (intA - intB) : (intB - intA);
+}
+
+namespace {
+ constexpr size_t MAX_IPV6_MASK_LEN = 16 * 8;
+ constexpr size_t MAX_IPV4_MASK_LEN = 4 * 8;
+ constexpr size_t IPV4_IN6_MASK_BASE = MAX_IPV6_MASK_LEN - MAX_IPV4_MASK_LEN;
+
+ TAddress SetMaskBits(const TAddress& addr, const size_t wantedMaskLen) {
+ auto maskLen = wantedMaskLen;
+ if (addr.IsIPv4() && maskLen && maskLen <= MAX_IPV4_MASK_LEN) {
+ maskLen += IPV4_IN6_MASK_BASE;
+ }
+
+ if (maskLen == 0 || maskLen > MAX_IPV6_MASK_LEN || (addr.IsIPv4() && maskLen < IPV4_IN6_MASK_BASE)) {
+ ythrow yexception() << "strange mask (calc/wanted) " << maskLen << "/" << wantedMaskLen << "; " << addr;
+ }
+
+ const int octetsForUpdate = (MAX_IPV6_MASK_LEN - maskLen) / 8;
+ const int bitsForUpdate = (MAX_IPV6_MASK_LEN - maskLen) % 8;
+
+ size_t currOctet = 15;
+ TAddress addrWithMask = addr;
+
+ for (int octetNum = 0; octetNum != octetsForUpdate; ++octetNum) {
+ addrWithMask.Data[currOctet--] = 0xff;
+ }
+
+ for (int bitNum = 0; bitNum != bitsForUpdate; ++bitNum) {
+ addrWithMask.Data[currOctet] ^= 1 << bitNum;
+ }
+
+ return addrWithMask;
+ }
+} // anon-ns
+
+TNetwork::TNetwork(const TString& str)
+ : TNetwork(static_cast<TVector<TString>>(StringSplitter(str).Split('/').SkipEmpty()))
+{}
+
+TNetwork::TNetwork(const TVector<TString>& data)
+ : TNetwork(data.size() ? data[0] : "",
+ data.size() > 1 ? FromStringWithDefault<size_t>(data[1]) : 0)
+{}
+
+TNetwork::TNetwork(const TString& net, size_t maskLen)
+ : begin(TAddress::ParseAny(net))
+ , end(SetMaskBits(begin, maskLen))
+{}
+
+}
+
+IOutputStream& operator<<(IOutputStream& output, const NIPREG::TAddress& addr) {
+ output << addr.AsShortIPv6();
+ return output;
+}
diff --git a/library/cpp/ipreg/address.h b/library/cpp/ipreg/address.h
new file mode 100644
index 0000000000..9071418d5b
--- /dev/null
+++ b/library/cpp/ipreg/address.h
@@ -0,0 +1,137 @@
+#pragma once
+
+#include <library/cpp/int128/int128.h>
+
+#include <util/generic/string.h>
+#include <util/digest/murmur.h>
+#include <util/string/cast.h>
+
+namespace NIPREG {
+
+struct TAddress {
+ enum class EAddressFormat {
+ IPV6 = 0x00 /* "ipv6" */,
+ LONG_IP = 0x01 /* "long" */,
+ SHORT_IP = 0x02 /* "short" */,
+ NUMERIC_IPV4 = 0x03 /* "num4" */,
+ NTOA = 0x04 /* "n2a" */,
+ SHORT_IPV6 = 0x05 /* "short-ipv6" */,
+ NUMERIC_IPV6 = 0x06 /* "num" */,
+ };
+
+ unsigned char Data[16] = {0}; // NOTA BENE: network byte order (Big-Endian)
+
+ // Comparison
+ bool operator==(const TAddress& other) const {
+ return memcmp(Data, other.Data, sizeof(Data)) == 0;
+ }
+
+ bool operator<(const TAddress& other) const {
+ return memcmp(Data, other.Data, sizeof(Data)) < 0;
+ }
+
+ bool operator>(const TAddress& other) const {
+ return memcmp(Data, other.Data, sizeof(Data)) > 0;
+ }
+
+ bool operator!=(const TAddress& other) const {
+ return !(*this == other);
+ }
+
+ bool operator<=(const TAddress& other) const {
+ return !(*this > other);
+ }
+
+ bool operator>=(const TAddress& other) const {
+ return !(*this < other);
+ }
+
+ double operator-(const TAddress& rhs) const;
+
+ // Parsing
+ static TAddress ParseAny(TStringBuf str);
+
+ static TAddress ParseIPv6(TStringBuf str);
+ static TAddress ParseIPv4(TStringBuf str);
+ static TAddress ParseIPv4Num(TStringBuf str);
+ static TAddress ParseIPv6Num(TStringBuf str);
+
+ static TAddress FromIPv4Num(ui32 num);
+ static TAddress FromUint128(ui128 addr);
+ static TAddress FromBinary(unsigned char const * data);
+ static TAddress FromBinaryIPv4(unsigned char const * const data);
+
+ static TAddress MakeNet64Broadcast(TAddress base);
+ static TAddress MakeNet64Prefix(TAddress base);
+
+ static const TAddress& Lowest();
+ static const TAddress& Highest();
+
+ // Inspecting
+ TString AsIPv4() const;
+ TString AsIPv4Num() const;
+ TString AsIPv6() const;
+ TString AsIPv6Num() const;
+ TString GetTextFromNetOrder() const;
+ TString GetHexString(bool deepView = false) const;
+
+ TString AsShortIP() const;
+ TString AsShortIPv6() const;
+ TString AsLongIP() const;
+
+ ui128 AsUint128() const;
+ ui64 GetHigh64() const;
+ ui64 GetLow64() const;
+ ui64 GetHigh64LE() const;
+ ui64 GetLow64LE() const;
+
+ bool IsNet64Broadcast() const;
+ bool IsNet64Host() const;
+
+ TAddress GetNet64() const {
+ return TAddress::FromUint128(ui128{GetHigh64LE()} << 64);
+ }
+
+ TAddress GetPrevNet64() const {
+ return TAddress::FromUint128(ui128{GetHigh64LE() - 1} << 64);
+ }
+
+ TAddress GetNextNet64() const {
+ return TAddress::FromUint128(ui128{GetHigh64LE() + 1} << 64);
+ }
+
+ TString Format(EAddressFormat format) const;
+
+ int GetType() const { return IsIPv4() ? 4 : 6; }
+ bool IsIPv4() const;
+
+ // Mutating
+ TAddress Next() const;
+ TAddress Prev() const;
+
+ static ui128 Distance(const TAddress& a, const TAddress& b);
+};
+
+using EAddressFormat = TAddress::EAddressFormat;
+
+struct TNetwork {
+ TAddress begin;
+ TAddress end;
+
+ TNetwork(const TString& str = "0.0.0.0/32");
+
+private:
+ TNetwork(const TVector<TString>& data);
+ TNetwork(const TString& net, size_t mask);
+};
+
+} // NIPREG
+
+template <>
+struct THash<NIPREG::TAddress> {
+ inline size_t operator()(const NIPREG::TAddress& address) const {
+ return MurmurHash<size_t>((const void*)address.Data, 16);
+ }
+};
+
+IOutputStream& operator<<(IOutputStream& output, const NIPREG::TAddress& addr);
diff --git a/library/cpp/ipreg/checker.cpp b/library/cpp/ipreg/checker.cpp
new file mode 100644
index 0000000000..9c41d27dc0
--- /dev/null
+++ b/library/cpp/ipreg/checker.cpp
@@ -0,0 +1,47 @@
+#include "checker.h"
+
+namespace NIPREG {
+
+void TChecker::CheckNextFatal(const TAddress& first, const TAddress& last) {
+ if (!CheckNext(first, last))
+ ythrow yexception() << "IPREG format error: " << first.AsIPv6() << " - " << last.AsIPv6();
+}
+
+TFlatChecker::TFlatChecker() : HasState(false) {
+}
+
+bool TFlatChecker::CheckNext(const TAddress& first, const TAddress& last) {
+ bool result = true;
+
+ if (first > last)
+ result = false;
+
+ if (HasState && first <= PrevLast)
+ result = false;
+
+ PrevLast = last;
+ HasState = true;
+
+ return result;
+}
+
+TIntersectingChecker::TIntersectingChecker() : HasState(false) {
+}
+
+bool TIntersectingChecker::CheckNext(const TAddress& first, const TAddress& last) {
+ bool result = true;
+
+ if (first > last)
+ result = false;
+
+ if (HasState && (first < PrevFirst || (first == PrevFirst && last < PrevLast)))
+ result = false;
+
+ PrevFirst = first;
+ PrevLast = last;
+ HasState = true;
+
+ return result;
+}
+
+}
diff --git a/library/cpp/ipreg/checker.h b/library/cpp/ipreg/checker.h
new file mode 100644
index 0000000000..1a04e62e77
--- /dev/null
+++ b/library/cpp/ipreg/checker.h
@@ -0,0 +1,37 @@
+#pragma once
+
+#include "address.h"
+
+namespace NIPREG {
+
+class TChecker {
+public:
+ virtual ~TChecker() {}
+
+ virtual bool CheckNext(const TAddress& first, const TAddress& last) = 0;
+
+ void CheckNextFatal(const TAddress& first, const TAddress& last);
+};
+
+class TFlatChecker: public TChecker {
+private:
+ TAddress PrevLast;
+ bool HasState;
+
+public:
+ TFlatChecker();
+ virtual bool CheckNext(const TAddress& first, const TAddress& last);
+};
+
+class TIntersectingChecker: public TChecker {
+private:
+ TAddress PrevFirst;
+ TAddress PrevLast;
+ bool HasState;
+
+public:
+ TIntersectingChecker();
+ virtual bool CheckNext(const TAddress& first, const TAddress& last);
+};
+
+}
diff --git a/library/cpp/ipreg/merge.cpp b/library/cpp/ipreg/merge.cpp
new file mode 100644
index 0000000000..d31e9dce5d
--- /dev/null
+++ b/library/cpp/ipreg/merge.cpp
@@ -0,0 +1,69 @@
+#include "merge.h"
+
+namespace NIPREG {
+
+void MergeIPREGS(TReader &a, TReader& b, std::function<void(const TAddress& first, const TAddress& last, const TString *a, const TString *b)>&& proc) {
+ bool hasA = a.Next();
+ bool hasB = b.Next();
+
+ TAddress top = TAddress::Lowest();
+ TAddress bottom;
+
+ do {
+ // tweak ranges we've passed
+ if (hasA && top > a.Get().Last)
+ hasA = a.Next();
+ if (hasB && top > b.Get().Last)
+ hasB = b.Next();
+
+ if (!hasA && !hasB) {
+ // both rangesets have ended
+ bottom = TAddress::Highest();
+ proc(top, bottom, nullptr, nullptr);
+ break;
+ }
+
+ const bool inA = hasA && a.Get().First <= top;
+ const bool inB = hasB && b.Get().First <= top;
+
+ if (!hasA) {
+ // rangeset a has ended
+ if (inB) {
+ bottom = b.Get().Last;
+ proc(top, bottom, nullptr, &b.Get().Data);
+ } else {
+ bottom = b.Get().First.Prev();
+ proc(top, bottom, nullptr, nullptr);
+ }
+ } else if (!hasB) {
+ // rangeset b has ended
+ if (inA) {
+ bottom = a.Get().Last;
+ proc(top, bottom, &a.Get().Data, nullptr);
+ } else {
+ bottom = a.Get().First.Prev();
+ proc(top, bottom, nullptr, nullptr);
+ }
+ } else if (inA && inB) {
+ // inside both ranges
+ bottom = Min(a.Get().Last, b.Get().Last);
+ proc(top, bottom, &a.Get().Data, &b.Get().Data);
+ } else if (inA) {
+ // only in range a
+ bottom = Min(a.Get().Last, b.Get().First.Prev());
+ proc(top, bottom, &a.Get().Data, nullptr);
+ } else if (inB) {
+ // only in range b
+ bottom = Min(b.Get().Last, a.Get().First.Prev());
+ proc(top, bottom, nullptr, &b.Get().Data);
+ } else {
+ // outside both ranges
+ bottom = Min(a.Get().First.Prev(), a.Get().First.Prev());
+ proc(top, bottom, nullptr, nullptr);
+ }
+
+ top = bottom.Next();
+ } while (bottom != TAddress::Highest());
+}
+
+}
diff --git a/library/cpp/ipreg/merge.h b/library/cpp/ipreg/merge.h
new file mode 100644
index 0000000000..123b88276c
--- /dev/null
+++ b/library/cpp/ipreg/merge.h
@@ -0,0 +1,11 @@
+#pragma once
+
+#include "reader.h"
+
+#include <functional>
+
+namespace NIPREG {
+
+void MergeIPREGS(TReader &a, TReader& b, std::function<void(const TAddress& first, const TAddress& last, const TString *a, const TString *b)>&& proc);
+
+}
diff --git a/library/cpp/ipreg/range.cpp b/library/cpp/ipreg/range.cpp
new file mode 100644
index 0000000000..1b90022482
--- /dev/null
+++ b/library/cpp/ipreg/range.cpp
@@ -0,0 +1,198 @@
+#include "range.h"
+
+#include "util_helpers.h"
+
+#include <library/cpp/int128/int128.h>
+#include <util/generic/maybe.h>
+#include <util/string/split.h>
+#include <util/string/vector.h>
+
+#include <stdexcept>
+
+namespace NIPREG {
+
+namespace {
+ EAddressFormat CurrentFormat = EAddressFormat::SHORT_IPV6;
+
+ void throwExceptionWithFormat(const TString& line) {
+ throw yexception() << "wanted format: ${ip-begin}-${ip-end}[\t${data}]; $input := '" << line << "'";
+ }
+
+ void throwIfReverseOrder(TAddress first, TAddress last) {
+ if (first > last) {
+ const TString err_msg = "reverse order of addresses (first / last) => " + first.AsIPv6() + " / " + last.AsIPv6();
+ throw std::runtime_error(err_msg.data());
+ }
+ }
+} // anon-ns
+
+TRange::TRange(TAddress first, TAddress last, const TString& data)
+ : First(first)
+ , Last(last)
+ , Data(data)
+{
+ throwIfReverseOrder(First, Last);
+}
+
+TRange::TRange(const TNetwork& net, const TString& data)
+ : TRange(net.begin, net.end, data)
+{
+}
+
+ui128 TRange::GetAddrsQty() const {
+ return TAddress::Distance(First, Last) + 1;
+}
+
+TRange TRange::BuildRange(const TString& line, bool isEmptyData, const TString& dataDelim) {
+ const TVector<TString> parts = StringSplitter(line).SplitBySet(dataDelim.data()).SkipEmpty();
+ if (parts.empty()) {
+ throwExceptionWithFormat(line);
+ }
+
+ if (TString::npos != parts[0].find('/')) {
+ const auto data = (2 == parts.size()) ? parts[1] : "";
+ return TRange(TNetwork(parts[0]), data);
+ }
+
+ const TVector<TString> range_parts = StringSplitter(parts[0]).SplitBySet(" -\t").SkipEmpty();
+ if (2 != range_parts.size() || range_parts[0].empty() || range_parts[1].empty()) {
+ throwExceptionWithFormat(line);
+ }
+
+ if (!isEmptyData && (2 != parts.size() || parts[1].empty())) {
+ throwExceptionWithFormat(line);
+ }
+
+ const auto& data = (2 == parts.size()) ? parts[1] : "";
+ return TRange(TAddress::ParseAny(range_parts[0]), TAddress::ParseAny(range_parts[1]), data);
+}
+
+bool TRange::Contains(const TRange& range) const {
+ return First <= range.First && range.Last <= Last;
+}
+
+bool TRange::Contains(const TAddress& ip) const {
+ return First <= ip && ip <= Last;
+}
+
+void SetIpFullOutFormat() {
+ CurrentFormat = EAddressFormat::IPV6;
+}
+
+void SetIpShortOutFormat() {
+ CurrentFormat = EAddressFormat::SHORT_IPV6;
+}
+
+void TRange::DumpTo(IOutputStream& output, bool withData, EAddressFormat format) const {
+ output << First.Format(format) << '-' << Last.Format(format);
+ if (withData) {
+ output << '\t' << Data;
+ }
+}
+
+bool TRange::IsIpv6Only() const {
+ return 6 == First.GetType() && 6 == Last.GetType();
+}
+
+bool TRange::IsIpv4Only() const {
+ return 4 == First.GetType() && 4 == Last.GetType();
+}
+
+bool TRange::IsRangeInSingleNet64() const {
+ return First.GetHigh64() == Last.GetHigh64();
+}
+
+TRange TRange::BuildRangeByFirst(const TRange& range, int prefix) {
+ Y_UNUSED(prefix);
+ return TRange(TAddress::MakeNet64Prefix(range.First),
+ TAddress::MakeNet64Broadcast(range.IsRangeInSingleNet64() ? range.Last : range.Last.GetPrevNet64()) ,
+ range.Data
+ );
+}
+
+TRange TRange::BuildRangeByLast(const TRange& range, int prefix) {
+ Y_UNUSED(prefix);
+ const auto prevLast = TAddress::MakeNet64Broadcast(range.Last.GetPrevNet64());
+ return TRange(range.First, prevLast, range.Data);
+// const auto prevLast = TAddress::MakeNet64Broadcast(range.Last);
+// return TRange(TAddress::MakeNet64Prefix(range.First), prevLast, range.Data);
+}
+
+TVector<TRange> SplitRangeNets(const TRange& origRange, bool addOrigSize, int maskLen) {
+ Y_UNUSED(maskLen);
+
+ static const auto firstCheckedIpv6Prefix = TAddress::ParseAny("2000::");
+
+ const auto& CalcNetSize = [&](const TRange& range) {
+ static const auto MAX_FOR_DIGITS_ANSWER = ui128{1 << 30};
+ const auto netSize = range.GetAddrsQty();
+ return (netSize < MAX_FOR_DIGITS_ANSWER) ? ToString(netSize) : "huge";
+ };
+
+ const auto& AddSizeField = [&](TRange& changedRange, const TRange& origAddrRange) {
+ if (addOrigSize) {
+ changedRange.Data = AddJsonAttrs({"orig_net_size"}, changedRange.Data, TMaybe<TString>(CalcNetSize(origAddrRange)));
+ }
+ };
+
+ if (origRange.Last <= firstCheckedIpv6Prefix) {
+ return {origRange};
+ }
+
+ if (origRange.IsRangeInSingleNet64()) {
+ TRange theOne{
+ TAddress::MakeNet64Prefix(origRange.First),
+ TAddress::MakeNet64Broadcast(origRange.Last),
+ origRange.Data
+ };
+ AddSizeField(theOne, origRange);
+ return {theOne};
+ }
+
+ TRange range{origRange};
+ TVector<TRange> result; {
+ // 1st
+ TRange byFirst{TAddress::MakeNet64Prefix(range.First),TAddress::MakeNet64Broadcast(range.First), range.Data};
+ AddSizeField(byFirst, {range.First, byFirst.Last, ""});
+ result.push_back(byFirst);
+
+ // maybe 2nd
+ range.First = byFirst.Last.Next();
+ if (!range.IsRangeInSingleNet64()) {
+ const TAddress lastPrefix = TAddress::MakeNet64Prefix(range.Last);
+
+ TRange inTheMiddle{TAddress::MakeNet64Prefix(range.First), lastPrefix.Prev(), range.Data};
+ AddSizeField(inTheMiddle, inTheMiddle);
+ result.push_back(inTheMiddle);
+
+ range.First = lastPrefix;
+ }
+
+ // the last
+ TRange byLast{range.First, TAddress::MakeNet64Broadcast(range.Last), range.Data};
+ AddSizeField(byLast, {byLast.First, range.Last, ""});
+ result.push_back(byLast);
+ }
+ return result;
+}
+
+bool operator==(const TRange& lhs, const TRange& rhs) {
+ return lhs.First == rhs.First && lhs.Last == rhs.Last;
+}
+
+} // ns IPREG
+
+IInputStream& operator>>(IInputStream& input, NIPREG::TRange& range) {
+ TString line;
+ if (!input.ReadLine(line)) {
+ throw std::runtime_error("unable to load data from stream");
+ }
+ range = NIPREG::TRange::BuildRange(line);
+ return input;
+}
+
+IOutputStream& operator<<(IOutputStream& output, const NIPREG::TRange& range) {
+ range.DumpTo(output, true, NIPREG::CurrentFormat);
+ output << "\n";
+ return output;
+}
diff --git a/library/cpp/ipreg/range.h b/library/cpp/ipreg/range.h
new file mode 100644
index 0000000000..15b2c693b0
--- /dev/null
+++ b/library/cpp/ipreg/range.h
@@ -0,0 +1,50 @@
+#pragma once
+
+#include "address.h"
+
+#include <util/generic/string.h>
+#include <util/generic/vector.h>
+#include <util/stream/input.h>
+#include <util/stream/output.h>
+
+#include <stdexcept>
+
+namespace NIPREG {
+
+struct TRange {
+ TAddress First;
+ TAddress Last;
+ TString Data;
+
+ TRange() = default;
+ TRange(TAddress first, TAddress last, const TString& data);
+ TRange(const TNetwork& net, const TString& data);
+
+ ui128 GetAddrsQty() const;
+ void DumpTo(IOutputStream& output, bool withData = true, EAddressFormat format = EAddressFormat::SHORT_IP) const;
+
+ static TRange BuildRange(const TString& line, bool isEmptyData = false, const TString& dataDelim = "\t");
+ bool Contains(const TRange& range) const;
+ bool Contains(const TAddress& ip) const;
+
+ static TRange BuildRangeByFirst(const TRange& range, int prefix = 64);
+ static TRange BuildRangeByLast(const TRange& range, int prefix = 64);
+
+ bool IsIpv6Only() const;
+ bool IsIpv4Only() const;
+
+ bool IsRangeInSingleNet64() const;
+};
+using TGenericEntry = TRange;
+
+void SetIpFullOutFormat();
+void SetIpShortOutFormat();
+
+TVector<TRange> SplitRangeNets(const TRange& range, bool addOrigSize = false, int maskLen = 64);
+
+bool operator==(const TRange& lhs, const TRange& rhs);
+inline bool operator!=(const TRange& lhs, const TRange& rhs) { return !(lhs == rhs); }
+} // ns NIPREG
+
+IInputStream& operator>>(IInputStream& input, NIPREG::TRange& range);
+IOutputStream& operator<<(IOutputStream& output, const NIPREG::TRange& range);
diff --git a/library/cpp/ipreg/reader.cpp b/library/cpp/ipreg/reader.cpp
new file mode 100644
index 0000000000..2e4ae1b178
--- /dev/null
+++ b/library/cpp/ipreg/reader.cpp
@@ -0,0 +1,82 @@
+#include "reader.h"
+
+#include <util/stream/file.h>
+
+namespace NIPREG {
+
+namespace {
+ const TString DASH_FNAME = "-";
+}
+
+TReader::TReader(const TString& filename, bool isEmptyData, const TString& dataDelim)
+ : OwnedStreamPtr((filename.empty() || filename == DASH_FNAME) ? nullptr : new TFileInput(filename))
+ , Stream(OwnedStreamPtr ? *OwnedStreamPtr.Get() : Cin)
+ , IsEmptyData(isEmptyData)
+ , DataDelim(dataDelim)
+{
+}
+
+TReader::TReader(IInputStream& stream, bool isEmptyData, const TString& dataDelim)
+ : Stream(stream)
+ , IsEmptyData(isEmptyData)
+ , DataDelim(dataDelim)
+{
+}
+
+bool TReader::Next() {
+ TString line;
+ if (!Stream.ReadLine(line))
+ return false;
+
+ CurrentEntry = TRange::BuildRange(line, IsEmptyData, DataDelim);
+ if (CurrentEntry.Data.empty()) {
+ if (!IsEmptyData) {
+ throw yexception() << "empty data part detected for [" << line << "]";
+ }
+ CurrentEntry.Data = "";
+ }
+ return true;
+}
+
+TReverseByLastIpReader::TReverseByLastIpReader(const TString& filename, bool isEmptyData, const TString& dataDelim)
+ : TParent(filename, isEmptyData, dataDelim)
+{
+ Valid = TParent::Next();
+}
+
+TReverseByLastIpReader::TReverseByLastIpReader(IInputStream& stream, bool isEmptyData, const TString& dataDelim)
+ : TParent(stream, isEmptyData, dataDelim)
+{
+ Valid = TParent::Next();
+}
+
+bool TReverseByLastIpReader::Next() {
+ if (!CurrentEntries.empty()) {
+ CurrentEntries.pop_back();
+ }
+
+ if (CurrentEntries.empty()) {
+ return PrepareNextEntries();
+ } else {
+ return true;
+ }
+}
+
+const TGenericEntry& TReverseByLastIpReader::Get() const {
+ return CurrentEntries.back();
+}
+
+bool TReverseByLastIpReader::PrepareNextEntries() {
+ if (!Valid) {
+ return false;
+ }
+
+ do {
+ CurrentEntries.push_back(TParent::Get());
+ Valid = TParent::Next();
+ } while (Valid && TParent::Get().First == CurrentEntries.back().First);
+
+ return true;
+}
+
+} // NIPREG
diff --git a/library/cpp/ipreg/reader.h b/library/cpp/ipreg/reader.h
new file mode 100644
index 0000000000..b68faedcf9
--- /dev/null
+++ b/library/cpp/ipreg/reader.h
@@ -0,0 +1,57 @@
+#pragma once
+
+#include "range.h"
+
+#include <util/generic/ptr.h>
+#include <util/generic/string.h>
+#include <util/stream/input.h>
+
+namespace NIPREG {
+
+class TReader {
+public:
+ TReader(const TString& filename = "", bool isEmptyData = false, const TString& dataDelim = "\t");
+ TReader(IInputStream& stream, bool isEmptyData = false, const TString& dataDelim = "\t");
+
+ virtual bool Next();
+
+ virtual const TGenericEntry& Get() const {
+ return CurrentEntry;
+ }
+
+ operator IInputStream&() {
+ return Stream;
+ }
+
+ virtual ~TReader() = default;
+
+private:
+ TAutoPtr<IInputStream> OwnedStreamPtr;
+ IInputStream& Stream;
+
+ bool IsEmptyData = false;
+ const TString DataDelim;
+
+ TGenericEntry CurrentEntry;
+};
+
+class TReverseByLastIpReader : public TReader {
+public:
+ using TParent = TReader;
+
+ explicit TReverseByLastIpReader(const TString& filename = "", bool isEmptyData = false, const TString& dataDelim = "\t");
+ explicit TReverseByLastIpReader(IInputStream& stream, bool isEmptyData = false, const TString& dataDelim = "\t");
+
+ bool Next() override;
+
+ const TGenericEntry& Get() const override;
+
+private:
+ bool PrepareNextEntries();
+
+private:
+ bool Valid = false;
+ TVector<TGenericEntry> CurrentEntries;
+};
+
+} // NIPREG
diff --git a/library/cpp/ipreg/sources.cpp b/library/cpp/ipreg/sources.cpp
new file mode 100644
index 0000000000..70e4b2a6da
--- /dev/null
+++ b/library/cpp/ipreg/sources.cpp
@@ -0,0 +1,100 @@
+#include "sources.h"
+
+#include <cstdint>
+#include <stdexcept>
+
+namespace NIPREG {
+
+const ui32 ML_COEFF_DEFAULT = 50000;
+ui32 ML_COEFFICIENT = ML_COEFF_DEFAULT;
+
+void SetCoefficient(ui32 type, ui32 value) {
+ switch (type) {
+ case SOURCE_ML:
+ ML_COEFFICIENT = value;
+ break;
+ default:
+ throw std::runtime_error("unsupported setcoeff-type");
+ }
+}
+
+double GetSourceCoefficient(ui32 type) {
+ switch (type) {
+ case SOURCE_MAIL: return 1;
+ case SOURCE_PHONE: return 3;
+ case SOURCE_GEO: return 4;
+ case SOURCE_COUNTRY: return 100;
+ case SOURCE_DOMAIN_NAME: return 1;
+ case SOURCE_MANUAL: return 1;
+ case SOURCE_YANDEX_NETWORK: return 1000; // NB: in yandex_noc source weight := 10K
+ case SOURCE_SPECIAL_NETWORK: return 1000000;
+ case SOURCE_PROVIDERS: return 50;
+ case SOURCE_MAXMIND: return 4;
+ case SOURCE_UNITED_UID_YANDEX_MAPS: return 0.7;
+ case SOURCE_RELIABILITY_AROUND: return 1;
+ case SOURCE_UNITED_UID_WEATHER: return 0.9;
+ case SOURCE_UNITED_UID_YANDEX_GID: return 1;
+ case SOURCE_UNITED_UID_SEARCH_QUERY: return 1.5;
+ case SOURCE_UNITED_UID_SEARCH_IN_REG: return 2;
+ case SOURCE_BGP_ASPATH_COMMUNITY: return 10;
+ case SOURCE_ML: return ML_COEFFICIENT;
+ }
+ return 0;
+}
+
+bool SourceWantApplyDepthCoeff(ui32 source_type) {
+ switch (source_type) {
+ case SOURCE_MAIL:
+ case SOURCE_PHONE:
+ case SOURCE_GEO:
+ case SOURCE_COUNTRY:
+ case SOURCE_DOMAIN_NAME:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool SourceWantApplyNetsizeCoeff(ui32 source_type) {
+ return SourceWantApplyDepthCoeff(source_type);
+}
+
+bool SourceIsHuman(ui32 source_type) {
+ switch (source_type) {
+ case SOURCE_UNITED_UID_SEARCH_QUERY:
+ case SOURCE_UNITED_UID_SEARCH_IN_REG:
+ case SOURCE_UNITED_UID_WEATHER:
+ case SOURCE_UNITED_UID_YANDEX_GID:
+ case SOURCE_UNITED_UID_YANDEX_MAPS:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool SourceIsForRegionNormalize(ui32 source_type) {
+ return SourceIsHuman(source_type);
+}
+
+bool SourceIsForEnoughHumanData(ui32 source_type) {
+ switch (source_type) {
+ case SOURCE_COUNTRY:
+ case SOURCE_MANUAL:
+ case SOURCE_PROVIDERS:
+ case SOURCE_YANDEX_NETWORK:
+ case SOURCE_SPECIAL_NETWORK:
+ return true;
+ default:
+ return SourceIsHuman(source_type);
+ }
+}
+
+bool SourceIsForFewHumanData(ui32 source_type) {
+ return !SourceIsHuman(source_type);
+}
+
+bool SourceIsForReliability(ui32 source_type) {
+ return SourceIsHuman(source_type) || SOURCE_YANDEX_NETWORK == source_type;
+}
+
+} // NIPREG
diff --git a/library/cpp/ipreg/sources.h b/library/cpp/ipreg/sources.h
new file mode 100644
index 0000000000..a517e57cb8
--- /dev/null
+++ b/library/cpp/ipreg/sources.h
@@ -0,0 +1,53 @@
+#pragma once
+
+#include <util/system/types.h>
+
+namespace NIPREG {
+
+// TODO(dieash@) make some automation/spicification via enabled sources (with full list)
+enum ESourceType {
+ // TODO(dieash@) full list of known src-types in choice-region-data:
+ // https://yql.yandex-team.ru/Operations/XEo-amim9Z2_PCkcZgQ0Wu-sqXAm1K8NMPesswuPzbk=
+ SOURCE_UNKNOWN = 0, // stub
+ SOURCE_MAIL = 1 /* "MAIL" */, // ripe src
+ SOURCE_PHONE = 2 /* "PHONE" */, // ripe src
+ SOURCE_GEO = 3 /* "GEO" */, // ripe src
+ SOURCE_COUNTRY = 4 /* "COUNTRY" */, // ripe, delegated, maxmind src
+ SOURCE_DOMAIN_NAME = 5 /* "DOMAIN_NAME" */, // ripe src
+ SOURCE_MANUAL = 6 /* "MANUAL" */, // manual src
+ SOURCE_YANDEX_NETWORK = 9 /* "YANDEX_NETWORK" */, // yandex-noc src
+ SOURCE_SPECIAL_NETWORK = 10 /* "SPECIAL_NETWORK" */, // spec-net src
+ SOURCE_PROVIDERS = 15 /* "PROVIDERS" */, // ripe src
+ SOURCE_MAXMIND = 17 /* "MAXMIND" */, // maxmind src
+ SOURCE_UNITED_UID_YANDEX_MAPS = 19 /* "UNITED_UID_YANDEX_MAPS" */, // uuid src
+ SOURCE_RELIABILITY_AROUND = 20 /* "RELIABILITY_AROUND" */, // rel-around src
+ SOURCE_UNITED_UID_WEATHER = 21 /* "UNITED_UID_WEATHER" */, // uuid src
+ SOURCE_UNITED_UID_YANDEX_GID = 22 /* "UNITED_UID_YANDEX_GID" */, // uuid src
+ SOURCE_UNITED_UID_SEARCH_QUERY = 23 /* "UNITED_UID_SEARCH_QUERY" */, // uuid src
+ SOURCE_UNITED_UID_SEARCH_IN_REG = 24 /* "UNITED_UID_SEARCH_IN_REG" */, // uuid src
+ SOURCE_BGP_ASPATH_COMMUNITY = 25 /* "BGP_ASPATH_COMMUNITY" */, // bgp src // NOTA BENE: clash with https://st.yandex-team.ru/IPREG-3722#5b367ec214778c001a5a3f7c
+ SOURCE_ML_INT_26 = 26 /* "ML_INT_26" */,
+ SOURCE_ML_INT_27 = 27 /* "ML_INT_27" */,
+ SOURCE_ML_INT_28 = 28 /* "ML_INT_28" */,
+ SOURCE_ML_INT_29 = 29 /* "ML_INT_29" */,
+ SOURCE_ML_INT_30 = 30 /* "ML_INT_30" */,
+ SOURCE_ML_INT_31 = 31 /* "ML_INT_31" */,
+ SOURCE_ML_INT_32 = 32 /* "ML_INT_32" */,
+ SOURCE_ML_INT_33 = 33 /* "ML_INT_33" */,
+ SOURCE_ML_INT_34 = 34 /* "ML_INT_34" */,
+ SOURCE_PRECISE_GEO_ML = 35 /* "ML_INT_35" */,
+ SOURCE_ML = 36 /* "ML" */, // ml src
+};
+
+double GetSourceCoefficient(ui32 type);
+bool SourceWantApplyDepthCoeff(ui32 source_type);
+bool SourceWantApplyNetsizeCoeff(ui32 source_type);
+bool SourceIsHuman(ui32 source_type);
+bool SourceExcludeFromReliability(ui32 source_type);
+bool SourceIsForRegionNormalize(ui32 source_type);
+bool SourceIsForEnoughHumanData(ui32 source_type);
+bool SourceIsForFewHumanData(ui32 source_type);
+bool SourceIsForReliability(ui32 source_type);
+
+void SetCoefficient(ui32 type, ui32 value);
+} // namespace NIPREG
diff --git a/library/cpp/ipreg/split.cpp b/library/cpp/ipreg/split.cpp
new file mode 100644
index 0000000000..19b7b85d51
--- /dev/null
+++ b/library/cpp/ipreg/split.cpp
@@ -0,0 +1,54 @@
+#include "split.h"
+
+#include <util/generic/list.h>
+#include <util/generic/vector.h>
+
+namespace NIPREG {
+
+void SplitIPREG(TReader &reader, std::function<void(const TAddress& first, const TAddress& last, const TVector<TString>& data)>&& proc) {
+ TList<TGenericEntry> prevEntries;
+
+ bool end;
+ do {
+ end = !reader.Next();
+
+ while (!prevEntries.empty() && (end || prevEntries.front().First < reader.Get().First)) {
+ // find smallest common range to process
+ TAddress first = prevEntries.front().First;
+ TAddress last = end ? TAddress::Highest() : reader.Get().First.Prev();
+
+ for (const auto& entry: prevEntries)
+ last = Min(last, entry.Last);
+
+ // extract data for the range
+ TVector<TString> strings;
+ auto item = prevEntries.begin();
+ while (item != prevEntries.end()) {
+ Y_ASSERT(item->First == first);
+ strings.push_back(item->Data);
+
+ if (item->Last == last) {
+ // item completely processed, remove
+ auto victim = item;
+ item++;
+ prevEntries.erase(victim);
+ } else {
+ // item still have part of range left, update it
+ item->First = last.Next();
+ item++;
+ }
+ }
+
+ proc(first, last, strings);
+ }
+
+ if (!end) {
+ if (!prevEntries.empty()) {
+ Y_ASSERT(prevEntries.front().First == reader.Get().First);
+ }
+ prevEntries.push_back(reader.Get());
+ }
+ } while (!end);
+}
+
+}
diff --git a/library/cpp/ipreg/split.h b/library/cpp/ipreg/split.h
new file mode 100644
index 0000000000..9710ff5f6d
--- /dev/null
+++ b/library/cpp/ipreg/split.h
@@ -0,0 +1,13 @@
+#pragma once
+
+#include "reader.h"
+
+#include <util/generic/vector.h>
+
+#include <functional>
+
+namespace NIPREG {
+
+void SplitIPREG(TReader &reader, std::function<void(const TAddress& first, const TAddress& last, const TVector<TString>& data)>&& proc);
+
+}
diff --git a/library/cpp/ipreg/stopwatch.cpp b/library/cpp/ipreg/stopwatch.cpp
new file mode 100644
index 0000000000..31d99d2758
--- /dev/null
+++ b/library/cpp/ipreg/stopwatch.cpp
@@ -0,0 +1,53 @@
+#include "stopwatch.h"
+
+#include <util/stream/str.h>
+
+namespace NIPREG {
+
+TStopWatch::TStopWatch() {
+ Start = TInstant::Now();
+}
+
+TStopWatch::~TStopWatch() {
+ try {
+ if (TaskRunning)
+ StopTask();
+
+ Cerr << "Everything done in " << FormatTime(TInstant::Now() - Start) << Endl;
+ } catch (...) {
+ // not much problem if we can't write the summary
+ }
+}
+
+void TStopWatch::StartTask(const TString& message) {
+ StopTask();
+
+ ++TaskOrdNum;
+ TaskStart = TInstant::Now();
+ TaskRunning = true;
+ Cerr << TaskOrdNum << ". " << message << "...\n";
+}
+
+void TStopWatch::StopTask() {
+ if (TaskRunning) {
+ Cerr << "Done in " << FormatTime(TInstant::Now() - TaskStart) << Endl;
+ TaskRunning = false;
+ }
+}
+
+TString TStopWatch::FormatTime(const TDuration& dur) {
+ auto sec = dur.Seconds();
+
+ TStringStream ss;
+
+ if (sec < 60)
+ ss << sec << "s";
+ else if (sec < 3600)
+ ss << sec / 60 << "m " << sec % 60 << "s";
+ else
+ ss << sec / 3600 << "h " << (sec / 60) % 60 << "m";
+
+ return ss.Str();
+}
+
+}
diff --git a/library/cpp/ipreg/stopwatch.h b/library/cpp/ipreg/stopwatch.h
new file mode 100644
index 0000000000..0873a638f6
--- /dev/null
+++ b/library/cpp/ipreg/stopwatch.h
@@ -0,0 +1,25 @@
+#pragma once
+
+#include <util/datetime/base.h>
+
+namespace NIPREG {
+
+class TStopWatch {
+private:
+ TInstant Start;
+ TInstant TaskStart;
+ bool TaskRunning = false;
+ ui32 TaskOrdNum = 0;
+
+private:
+ TString FormatTime(const TDuration& dur);
+
+public:
+ TStopWatch();
+ ~TStopWatch();
+
+ void StartTask(const TString& message);
+ void StopTask();
+};
+
+}
diff --git a/library/cpp/ipreg/util_helpers.cpp b/library/cpp/ipreg/util_helpers.cpp
new file mode 100644
index 0000000000..1b64baef55
--- /dev/null
+++ b/library/cpp/ipreg/util_helpers.cpp
@@ -0,0 +1,705 @@
+#include "util_helpers.h"
+
+#include <library/cpp/ipreg/reader.h>
+
+#include <library/cpp/json/json_reader.h>
+#include <library/cpp/json/json_value.h>
+#include <library/cpp/json/json_writer.h>
+
+#include <library/cpp/geobase/lookup.hpp>
+
+#include <util/generic/ptr.h>
+#include <util/generic/vector.h>
+#include <util/stream/file.h>
+#include <util/stream/format.h>
+#include <util/string/split.h>
+#include <util/string/vector.h>
+#include <util/stream/str.h>
+
+namespace NIPREG {
+ namespace {
+ double FindNearestCoarsedCoeff(double baseValue) {
+ using ValueStepPair = std::pair<double, double>;
+ static const double fix = 0.01;
+ static const TVector<ValueStepPair> limits = {
+ { 100., 20. + fix },
+ { 500., 50. + fix },
+ { 2500., 100. + fix },
+ { 10000., 1000. + fix },
+ { 50000., 10000. + fix }
+ };
+
+ double last_step{};
+ for (const auto& pair : limits) {
+ last_step = pair.second;
+ if (baseValue <= pair.first) {
+ break;
+ }
+ }
+ return last_step;
+ }
+
+ double CalcCoarsedValue(double baseValue) {
+ if (baseValue < 0.) {
+ ythrow yexception() << "negative value detected: " << baseValue;
+ }
+
+ // TODO(dieash) some "strange" calculation below
+ const auto coarsedCoeff = FindNearestCoarsedCoeff(baseValue);
+ const double fixedValue = coarsedCoeff * static_cast<int>((baseValue + coarsedCoeff / 2) / coarsedCoeff);
+ return fixedValue;
+ }
+
+ const char * const REL_FIELD = "reliability";
+ const char * const REG_FIELD = "region_id";
+
+ void CorrectReliability(NJson::TJsonValue& jsonData, const TString& data) {
+ jsonData = ParseJsonString(data);
+ auto& jsonMap = jsonData.GetMapSafe();
+
+ auto& reliabilityField = jsonMap[REL_FIELD];
+ reliabilityField = CalcCoarsedValue(reliabilityField.GetDouble());
+ }
+
+ TString SortJson(const TString& data) {
+ NJson::TJsonValue json = ParseJsonString(data);
+ return SortJsonData(json);
+ }
+
+ static TString MergeJsonsData(const TString& data1, const TString& data2, bool sortKeys = false, bool countMerge = false) {
+ static const char* MERGE_QTY = "_mrg_qty_";
+
+ auto json1 = ParseJsonString(data1);
+ const auto& json2 = ParseJsonString(data2);
+
+ if (countMerge && !json1.Has(MERGE_QTY)) {
+ json1.InsertValue(MERGE_QTY, 1);
+ }
+
+ for (const auto& item : json2.GetMapSafe()) {
+ json1.InsertValue(item.first, item.second);
+ }
+
+ if (countMerge) {
+ json1.InsertValue(MERGE_QTY, (json1[MERGE_QTY].GetInteger() + 1));
+ }
+
+ const auto NoFormat = false;
+ return NJson::WriteJson(json1, NoFormat, sortKeys);
+ }
+
+ bool IsJsonEquals(const TVector<TString>& excludeFieldsList, const TString& data1, const TString& data2) {
+ if (excludeFieldsList.empty()) {
+ return data1 == data2;
+ }
+
+ auto json1 = ParseJsonString(data1);
+ auto json2 = ParseJsonString(data2);
+
+ for (const auto& excludeField : excludeFieldsList) {
+ json1.EraseValue(excludeField);
+ json2.EraseValue(excludeField);
+ }
+
+ return json1 == json2;
+ }
+
+ class Patcher {
+ public:
+ Patcher(TReader& base, TReader& patch, IOutputStream& output, bool sortData)
+ : BaseStream(base)
+ , PatchStream(patch)
+ , Output(output)
+ , SortData(sortData)
+ {
+ GetNext(BaseStream, BaseRangePtr);
+ GetNext(PatchStream, PatchRangePtr);
+ }
+
+ void Process() {
+ while (BaseRangePtr || PatchRangePtr) {
+ if ( CheckPatch()
+ || OnlySecond(BaseRangePtr, PatchRangePtr, PatchStream)
+ || OnlySecond(PatchRangePtr, BaseRangePtr, BaseStream)
+ || Range1BeforeRange2(BaseRangePtr, PatchRangePtr, BaseStream)
+ || Range1BeforeRange2(PatchRangePtr, BaseRangePtr, PatchStream)
+ || FirstEndInSecond(BaseRangePtr, PatchRangePtr)
+ || FirstEndInSecond(PatchRangePtr, BaseRangePtr)
+ || FirstStartInSecond(BaseRangePtr, PatchRangePtr, BaseStream, PatchStream))
+ {
+ continue;
+ }
+ }
+ }
+
+ private:
+ void GetNext(TReader& stream, TAutoPtr<TRange>& rangePtr) {
+ if (stream.Next()) {
+ if (rangePtr) {
+ *rangePtr = stream.Get();
+ } else {
+ rangePtr.Reset(new TRange(stream.Get()));
+ }
+ }
+ else {
+ rangePtr.Reset();
+ }
+ }
+
+ void Print(const TRange& range) const {
+ Output << range;
+ }
+
+ void PrintSorted(const TRange& range) const {
+ const TRange sortedCopy{range.First, range.Last, SortJson(range.Data)};
+ Output << sortedCopy;
+ }
+
+ bool CheckPatch() {
+ if (PatchRangePtr && PatchRangePtr->First > PatchRangePtr->Last) {
+ GetNext(PatchStream, PatchRangePtr);
+ return true;
+ }
+ return false;
+ }
+
+ bool OnlySecond(TAutoPtr<TRange>& first, TAutoPtr<TRange>& second, TReader& stream) {
+ if (!first && second) {
+ Print(*second);
+ GetNext(stream, second);
+ return true;
+ }
+ return false;
+ }
+
+ bool Range1BeforeRange2(TAutoPtr<TRange>& first, TAutoPtr<TRange>& second, TReader& stream) {
+ if (first->Last < second->First) {
+ Print(*first);
+ GetNext(stream, first);
+ return true;
+ }
+ return false;
+ }
+
+ bool FirstEndInSecond(TAutoPtr<TRange>& first, TAutoPtr<TRange>& second) {
+ if (first->First < second->First) {
+ auto leftBaseRange = *first;
+ leftBaseRange.Last = second->First.Prev();
+ Print(leftBaseRange);
+
+ first->First = second->First;
+ return true;
+ }
+ return false;
+ }
+
+ bool FirstStartInSecond(TAutoPtr<TRange>& first, TAutoPtr<TRange>& second, TReader& stream1, TReader& stream2) {
+ if (first->First >= second->First) {
+ auto leftBaseRange = *first;
+ leftBaseRange.Data = MergeJsonsData(first->Data, second->Data);
+
+ if (first->Last <= second->Last) {
+ second->First = first->Last.Next();
+ GetNext(stream1, first);
+ if (second->First == TAddress::Highest()) {
+ GetNext(stream2, second);
+ }
+ } else {
+ leftBaseRange.Last = second->Last;
+ first->First = second->Last.Next();
+ GetNext(stream2, second);
+ }
+
+ SortData ? PrintSorted(leftBaseRange) : Print(leftBaseRange);
+ return true;
+ }
+ return false;
+ }
+
+ private:
+ TAutoPtr<TRange> BaseRangePtr;
+ TAutoPtr<TRange> PatchRangePtr;
+
+ TReader& BaseStream;
+ TReader& PatchStream;
+ IOutputStream& Output;
+ const bool SortData = false;
+ };
+
+ struct IpChecker {
+ static void LessOrEqual(const size_t row, const TAddress& lastIp, const TAddress& checkedIp) {
+ if (lastIp <= checkedIp) {
+ return;
+ }
+ GenErr(row, " <= ", lastIp, checkedIp);
+ }
+
+ static void Less(const size_t row, const TAddress& lastIp, const TAddress& checkedIp) {
+ if (lastIp < checkedIp) {
+ return;
+ }
+ GenErr(row, " < ", lastIp, checkedIp);
+ }
+
+ static void GenErr(const size_t row, const char* msg, const TAddress& lastIp, const TAddress& checkedIp) {
+ const TString& errMsg = ">>> row#" + ToString(row) + "; " + lastIp.AsIPv6() + msg + checkedIp.AsIPv6();
+ throw std::runtime_error(errMsg.data());
+ }
+ };
+
+ class MergerBy3 {
+ public:
+ MergerBy3(const TString& geodataPath, IOutputStream& output)
+ : Geobase(geodataPath)
+ , Out(output)
+ {}
+
+ void Process(TReader& input, bool ByRegsOnly, bool silentMode) {
+ while (input.Next()) {
+ Trio.push_back(input.Get());
+ if (3 > Trio.size()) {
+ continue;
+ }
+
+ auto& range2Data = (++Trio.begin())->Data;
+ if (range2Data.npos != range2Data.find("\"is_placeholder\":1")) {
+ PrintAndDrop1stRange();
+ PrintAndDrop1stRange();
+ continue;
+ }
+
+ const auto range1RegId = GetRegionId(Trio.begin()->Data);
+ const auto range3RegId = GetRegionId(Trio.rbegin()->Data);
+ if (range1RegId != range3RegId) {
+ PrintAndDrop1stRange();
+ continue;
+ }
+
+ const auto range2RegId = GetRegionId(range2Data);
+ const auto& parentsIds = Geobase.GetParentsIds(range1RegId);
+ if (parentsIds.end() == std::find(parentsIds.begin() + 1, parentsIds.end(), range2RegId)) {
+ PrintAndDrop1stRange();
+ continue;
+ }
+
+ if (!ByRegsOnly) {
+ const auto range1Size = Trio.begin()->GetAddrsQty();
+ const auto range2Size = (++Trio.begin())->GetAddrsQty();
+ const auto range3Size = Trio.rbegin()->GetAddrsQty();
+
+ if (range2Size > (range1Size + range3Size)) {
+ PrintAndDrop1stRange();
+ continue;
+ }
+ }
+
+ range2Data = SubstRegionId(range2Data, range1RegId);
+ if (!silentMode) {
+ PrintSubstNote(range2RegId, range1RegId);
+ }
+
+ PrintAndDrop1stRange(); // 1st
+ PrintAndDrop1stRange(); // 2nd
+ }
+
+ while (Trio.end() != Trio.begin()) {
+ PrintAndDrop1stRange();
+ }
+ }
+ private:
+ void PrintAndDrop1stRange() {
+ Out << *Trio.begin();
+ Trio.erase(Trio.begin());
+ }
+
+ void PrintSubstNote(const int oldId, const int newId) {
+ const bool NoData = false;
+ Cerr << "s/" << oldId << "/" << newId << "/: [";
+
+ Trio.begin()->DumpTo(Cerr, NoData);
+ Cerr << "/" << Trio.begin()->GetAddrsQty() << " | ";
+
+ const auto& range2nd = *(++Trio.begin());
+ range2nd.DumpTo(Cerr, NoData);
+ Cerr << "/" << range2nd.GetAddrsQty() << " | ";
+
+ Trio.rbegin()->DumpTo(Cerr, NoData);
+ Cerr << "/" << Trio.rbegin()->GetAddrsQty() << "]\n";
+ }
+
+
+ static int GetRegionId(const TString& data) {
+ const auto& json = ParseJsonString(data);
+ auto reg_id = json["region_id"].GetIntegerSafe(0);
+ return 99999 == reg_id ? 10000 : reg_id;
+ }
+
+ static TString SubstRegionId(const TString& data, const int newId) {
+ auto json = ParseJsonString(data);
+ json.InsertValue("region_id", newId);
+ return SortJsonData(json);
+ }
+
+ const NGeobase::TLookup Geobase;
+ IOutputStream& Out;
+ TList<TRange> Trio;
+ };
+ } // anon-ns
+
+ void DoCoarsening(IInputStream& input, IOutputStream& output) {
+ TString line;
+ while (input.ReadLine(line)) {
+ TVector<TString> parts;
+ StringSplitter(line).Split('\t').AddTo(&parts);
+
+ NJson::TJsonValue jsonData;
+ CorrectReliability(jsonData, parts[1]);
+ output << parts[0] << "\t" << "{\""
+ << REG_FIELD << "\":" << jsonData[REG_FIELD] << ",\""
+ << REL_FIELD << "\":" << Prec(jsonData[REL_FIELD].GetDouble(), PREC_POINT_DIGITS_STRIP_ZEROES, 2)
+ << "}\n";
+ }
+ }
+
+ void DoMergeEqualsRange(TReader& input, IOutputStream& output) {
+ // TODO(dieash@) may be check region for parent/child relation
+ // , const TString& geodataPath
+ // NGeobase::TLookup geoLookup(geodataPath);
+
+ TVector<TString> rangeDataList;
+ TRange lastRange{};
+
+ const char* REG_ID_ATTR = "region_id";
+ const char* ORG_NET_ATTR = "orig_net_size";
+ const char* HUGE_SIZE_VALUE = "huge";
+
+ const int HUGE_SIZE_COEFF = 100;
+
+ const auto CalcRegionBinding = [&]() {
+ if (rangeDataList.empty()) {
+ throw std::runtime_error("empty data list");
+ }
+
+ if (1 == rangeDataList.size()) {
+ return rangeDataList[0];
+ }
+
+ size_t maxAmount{};
+ NJson::TJsonValue maxData;
+
+ THashMap<NGeobase::TId, size_t> reg2amount;
+ for (const auto& data : rangeDataList) {
+ const auto& json = ParseJsonString(data);
+
+ const auto id = json[REG_ID_ATTR].GetInteger();
+ const auto amount = (json.Has(ORG_NET_ATTR) && HUGE_SIZE_VALUE == json[ORG_NET_ATTR].GetString()) ? HUGE_SIZE_COEFF : FromString<int>(json[ORG_NET_ATTR].GetString());
+ reg2amount[id] += amount;
+
+ if (reg2amount[id] > maxAmount) {
+ maxData = json;
+ }
+ }
+
+ maxData.EraseValue(ORG_NET_ATTR);
+ return SortJsonData(maxData);
+ };
+
+ const auto PrintRow = [&]() {
+ if (rangeDataList.empty()) {
+ return;
+ }
+ lastRange.Data = CalcRegionBinding();
+ output << lastRange;
+ };
+
+ while (input.Next()) {
+ auto currRange = input.Get();
+ if (currRange != lastRange) {
+ PrintRow();
+
+ lastRange = currRange;
+ rangeDataList = {};
+ }
+
+ rangeDataList.push_back(currRange.Data);
+ }
+ PrintRow();
+ }
+
+ void DoMerging(TReader& input, IOutputStream& output, const MergeTraits& traits) {
+ if (!input.Next()) {
+ return; // empty file here
+ }
+
+ const bool IsJsonData = traits.ConcatSep.empty();
+
+ TRange joinedRange = input.Get();
+ if (traits.SortData) {
+ joinedRange.Data = SortJson(joinedRange.Data);
+ }
+
+ while (input.Next()) {
+ auto currRange = input.Get();
+ if (traits.SortData) {
+ currRange.Data = SortJson(currRange.Data);
+ }
+
+ if (currRange.Contains(joinedRange) && joinedRange.Data == currRange.Data) {
+ joinedRange = currRange;
+ continue;
+ }
+
+ if (traits.JoinNestedRanges && joinedRange.Contains(currRange) && joinedRange.Data == currRange.Data) {
+ continue;
+ }
+
+ if ( currRange.First != joinedRange.Last.Next()
+ || ( IsJsonData && !IsJsonEquals(traits.ExcludeFieldsList, currRange.Data, joinedRange.Data))
+ || (!IsJsonData && currRange.Data != joinedRange.Data))
+ {
+ output << joinedRange;
+ joinedRange = currRange;
+ } else {
+ if (IsJsonData) {
+ joinedRange.Data = MergeJsonsData(currRange.Data, joinedRange.Data, traits.SortData, traits.CountMerges);
+ } else {
+ joinedRange.Data = (joinedRange.Data == currRange.Data) ? joinedRange.Data : (joinedRange.Data + traits.ConcatSep + currRange.Data);
+ }
+ joinedRange.Last = currRange.Last;
+ }
+ }
+
+ output << joinedRange;
+ }
+
+ void DoMerging3(TReader& input, IOutputStream& output, const TString& geodata, bool ByRegsOnly, bool silentMode) {
+ MergerBy3 merger(geodata, output);
+ merger.Process(input, ByRegsOnly, silentMode);
+ }
+
+ void DoPatching(TReader& base, TReader& patch, IOutputStream& output, bool sortData) {
+ Patcher(base, patch, output, sortData).Process();
+ }
+
+ const TString STUB_DATA{"{\"is_placeholder\":1,\"region_id\":10000,\"reliability\":0}"};
+
+ void AddStubRanges(TReader& input, IOutputStream& output) {
+ TRange stub{
+ TAddress::Lowest(),
+ TAddress::Lowest(),
+ STUB_DATA
+ };
+
+ while (input.Next()) {
+ const auto& currRange = input.Get();
+
+ if (stub.First > currRange.First) {
+ const TString& errMsg = ">>> bad ranges ($stub.begin > $next.begin) // " + stub.First.AsShortIPv6() + " | " + currRange.First.AsShortIPv6();
+ throw std::runtime_error(errMsg.data());
+ }
+
+ if (stub.First < currRange.First) {
+ stub.Last = currRange.First.Prev();
+ output << stub;
+ }
+
+ output << currRange;
+ stub.First = currRange.Last.Next();
+ }
+
+ if (stub.First != TAddress::Highest()) {
+ stub.Last = TAddress::Highest();
+ output << stub;
+ }
+ }
+
+ void CheckAddressSpaceForCompleteness(IInputStream& input, IOutputStream& output) {
+ TAddress lastIp = TAddress::Lowest();
+ size_t row_number = 0;
+
+ TString line;
+ while (input.ReadLine(line)) {
+ ++row_number;
+ output << line << "\n";
+
+ const auto& currRange = TRange::BuildRange(line);
+ if (row_number == 1) {
+ if (currRange.First != TAddress::Lowest()) {
+ const TString err_msg = "bad first addr (ip / wanted_ip) => " + currRange.First.AsIPv6() + " / " + TAddress::Lowest().AsIPv6();
+ throw std::runtime_error(err_msg);
+ }
+ lastIp = currRange.Last;
+ continue;
+ }
+
+ if (lastIp == currRange.First || lastIp.Next() != currRange.First) {
+ const TString err_msg = ">>> row#" + ToString(row_number) + " bad pair (last_ip / next_ip) => " + lastIp.AsIPv6() + " / " + currRange.First.AsIPv6();
+ throw std::runtime_error(err_msg);
+ }
+
+ lastIp = currRange.Last;
+ }
+
+ if (lastIp != TAddress::Highest()) {
+ const TString err_msg = "bad last addr (last_ip / wanted_ip) => " + lastIp.AsIPv6() + " / " + TAddress::Highest().AsIPv6();
+ throw std::runtime_error(err_msg);
+ }
+ }
+
+ void CheckRangesForMonotonicSequence(IInputStream& input, IOutputStream& output, bool IsStrict) {
+ TAddress lastIp = TAddress::Lowest();
+
+ size_t row = 0;
+ TString line;
+ while (input.ReadLine(line)) {
+ ++row;
+ output << line << "\n";
+
+ const auto& currRange = TRange::BuildRange(line);
+ if (row == 1) {
+ lastIp = currRange.Last;
+ continue;
+ }
+
+ if (IsStrict) {
+ IpChecker::Less(row, lastIp, currRange.First);
+ } else {
+ IpChecker::LessOrEqual(row, lastIp, currRange.First);
+ }
+ lastIp = currRange.Last;
+ }
+ }
+
+ NJson::TJsonValue ParseJsonString(const TString& data) {
+ const auto throwIfError = true;
+
+ NJson::TJsonValue json;
+ NJson::ReadJsonFastTree(data, &json, throwIfError);
+ return json;
+ }
+
+ TString SortJsonData(const NJson::TJsonValue& json) {
+ const auto NoFormat = false;
+ const auto SortKeys = true;
+
+ return NJson::WriteJson(json, NoFormat, SortKeys);
+ }
+
+ TString SortJsonData(const TString& jsonStr) {
+ return SortJsonData(ParseJsonString(jsonStr));
+ }
+
+ TString AddJsonAttrs(const TVector<TString>& addFieldsList, const TString& jsonStr, const TMaybe<TString>& attrValue) {
+ if (addFieldsList.empty()) {
+ return jsonStr;
+ }
+
+ auto json = ParseJsonString(jsonStr);
+ for (const auto& newField : addFieldsList) {
+ if (!newField.empty()) {
+ if (attrValue) {
+ json.InsertValue(newField, *attrValue);
+ } else {
+ json.InsertValue(newField, 1);
+ }
+ }
+ }
+ return json.GetStringRobust();
+ }
+
+ TString ExcludeJsonAttrs(const TVector<TString>& excludeFieldsList, const TString& jsonStr) {
+ if (excludeFieldsList.empty()) {
+ return jsonStr;
+ }
+
+ auto json = ParseJsonString(jsonStr);
+ for (const auto& excludeField : excludeFieldsList) {
+ if (!excludeField.empty()) {
+ json.EraseValue(excludeField);
+ }
+ }
+ return json.GetStringRobust();
+ }
+
+ TString ExtractJsonAttrs(const TVector<TString>& extractFieldsList, const TString& jsonStr) {
+ if (extractFieldsList.empty()) {
+ return jsonStr;
+ }
+
+ auto json = ParseJsonString(jsonStr);
+ NJson::TJsonValue newJson;
+ for (const auto& field : extractFieldsList) {
+ if (json.Has(field)) {
+ newJson.InsertValue(field, json[field]);
+ }
+ }
+ if (!newJson.IsDefined()) {
+ return {};
+ }
+ return newJson.GetStringRobust();
+ }
+
+ namespace CliParamsDesc {
+ const TString InputFnameParam = "input-data";
+ const TString OutputFnameParam = "output-data";
+ const TString OutputFullIpParam = "show-full-ip";
+ const TString PrintStatsParam = "print-stats";
+ const TString PrintYtStatsParam = "yt-stats";
+
+ const TString InputFnameParamDesc = "path to input IPREG-data; leave empty or use '-' for stdin";
+ const TString OutputFnameParamDesc = "path to file for output results; leave empty for stdout";
+ const TString OutputFullIpParamDesc = "print full ipv6 (by default - short)";
+ const TString PrintStatsParamDesc = "print internal statistics; @stderr";
+ const TString PrintYtStatsParamDesc = "print YT-stats (by default, file-descriptor 5)";
+ } // ns CliParamsDesc
+
+ DefaultCliParams::DefaultCliParams() {
+ using namespace CliParamsDesc;
+
+ Opts.SetFreeArgsMax(0);
+ Opts.AddHelpOption('h');
+
+ Opts.AddLongOption('i', InputFnameParam)
+ .RequiredArgument("filename")
+ .DefaultValue(InputFname)
+ .StoreResult(&InputFname).Help(InputFnameParamDesc);
+
+ Opts.AddLongOption('o', OutputFnameParam)
+ .RequiredArgument("filename")
+ .DefaultValue(OutputFname)
+ .StoreResult(&OutputFname).Help(OutputFnameParamDesc);
+
+ Opts.AddLongOption('f', OutputFullIpParam)
+ .Optional()
+ .NoArgument()
+ .DefaultValue("0")
+ .OptionalValue("1")
+ .StoreResult(&OutputFullIp).Help(OutputFullIpParamDesc);
+
+ Opts.AddLongOption(PrintStatsParam)
+ .Optional()
+ .NoArgument()
+ .DefaultValue("0")
+ .OptionalValue("1")
+ .StoreResult(&PrintStats).Help(PrintStatsParamDesc);
+
+ Opts.AddLongOption(PrintYtStatsParam)
+ .Optional()
+ .NoArgument()
+ .DefaultValue("0")
+ .OptionalValue("1")
+ .StoreResult(&PrintYtStats).Help(PrintYtStatsParamDesc);
+ }
+
+ void DefaultCliParams::ApplyFlags() const {
+ if (OutputFullIp) {
+ SetIpFullOutFormat();
+ }
+ }
+
+ void DefaultCliParams::Parse(int argc, const char **argv) {
+ NLastGetopt::TOptsParseResult optRes(&GetOpts(), argc, argv);
+ ApplyFlags();
+ }
+
+} // NIPREG
diff --git a/library/cpp/ipreg/util_helpers.h b/library/cpp/ipreg/util_helpers.h
new file mode 100644
index 0000000000..eab2dfb320
--- /dev/null
+++ b/library/cpp/ipreg/util_helpers.h
@@ -0,0 +1,65 @@
+#pragma once
+
+#include <library/cpp/getopt/opt.h>
+#include <util/generic/string.h>
+#include <util/generic/maybe.h>
+
+class IInputStream;
+class IOutputStream;
+
+namespace NJson {
+ class TJsonValue;
+}
+
+namespace NIPREG {
+ class TReader;
+
+ // @input any form of range+payload
+ // @output $ip.begin-$ip.end \t {"region_id":$reg,"reliability":$rel}
+ void DoCoarsening(IInputStream& input, IOutputStream& output);
+
+ struct MergeTraits {
+ const TVector<TString> ExcludeFieldsList;
+ TString ConcatSep;
+ bool SortData{};
+ bool CountMerges{};
+ bool JoinNestedRanges{};
+ };
+
+ void DoMerging(TReader& input, IOutputStream& output, const MergeTraits& traits);
+ void DoMerging3(TReader& input, IOutputStream& output, const TString& geodata, bool ByRegsOnly = false, bool silentMode = false);
+ void DoMergeEqualsRange(TReader& input, IOutputStream& output);
+
+ void DoPatching(TReader& base, TReader& patch, IOutputStream& output, bool sortData = false);
+
+ void AddStubRanges(TReader& input, IOutputStream& output);
+
+ void CheckAddressSpaceForCompleteness(IInputStream& input, IOutputStream& output);
+ void CheckRangesForMonotonicSequence(IInputStream& input, IOutputStream& output, bool IsStrict = false);
+
+ NJson::TJsonValue ParseJsonString(const TString& data);
+ TString SortJsonData(const NJson::TJsonValue& json);
+ TString SortJsonData(const TString& json);
+
+ TString AddJsonAttrs(const TVector<TString>& addFieldsList, const TString& jsonStr, const TMaybe<TString>& attrValue);
+ TString ExcludeJsonAttrs(const TVector<TString>& excludeFieldsList, const TString& jsonStr);
+ TString ExtractJsonAttrs(const TVector<TString>& excludeFieldsList, const TString& jsonStr);
+
+ extern const TString STUB_DATA;
+
+ struct DefaultCliParams {
+ DefaultCliParams();
+
+ NLastGetopt::TOpts& GetOpts() { return Opts; }
+ void Parse(int argc, const char **argv);
+ void ApplyFlags() const;
+
+ TString InputFname = "-";
+ TString OutputFname = "";
+ bool OutputFullIp = false;
+ bool PrintStats = false;
+ bool PrintYtStats = false;
+
+ NLastGetopt::TOpts Opts;
+ };
+} // NIPREG
diff --git a/library/cpp/ipreg/writer.cpp b/library/cpp/ipreg/writer.cpp
new file mode 100644
index 0000000000..89f8c8b629
--- /dev/null
+++ b/library/cpp/ipreg/writer.cpp
@@ -0,0 +1,91 @@
+#include "writer.h"
+
+#include <util/stream/file.h>
+
+namespace NIPREG {
+
+TWriter::TWriter(const TString& fname)
+ : OwnedStreamPtr(fname.empty() ? nullptr : new TFileOutput(fname))
+ , Stream(OwnedStreamPtr ? *OwnedStreamPtr.Get() : Cout)
+ , AddrSeparator(ADDR_SEP)
+ , DataSeparator(DATA_SEP)
+ , SplitMixed(false)
+{
+}
+
+TWriter::TWriter(IOutputStream& stream, EAddressFormat addressFormat, const TString& addrSep, const TString& dataSep, const bool splitMixed)
+ : Stream(stream)
+ , AddressFormat(addressFormat)
+ , AddrSeparator(addrSep)
+ , DataSeparator(dataSep)
+ , SplitMixed(splitMixed)
+{
+}
+
+namespace {
+ const TAddress IPv4Start = TAddress::ParseIPv4("0.0.0.0");
+ const TAddress IPv4End = TAddress::ParseIPv4("255.255.255.255");
+
+ const TAddress IPv6BeforeV4 = IPv4Start.Prev();
+ const TAddress IPv6AfterV4 = IPv4End.Next();
+}
+
+void TWriter::Write(const TAddress& first, const TAddress& last, const TString& data, bool printRange) {
+ if (SplitMixed) {
+ if (first < IPv4Start && IPv4Start < last) {
+ Write(first, IPv6BeforeV4, data, printRange);
+ Write(IPv4Start, last, data, printRange);
+ return;
+ }
+
+ if (first < IPv4End && IPv4End < last) {
+ Write(first, IPv4End, data, printRange);
+ Write(IPv6AfterV4, last, data, printRange);
+ return;
+ }
+ }
+ WriteImpl(first, last, data, printRange);
+}
+
+void TWriter::WriteImpl(const TAddress& first, const TAddress& last, const TString& data, bool printRange) {
+ if (printRange) {
+ Stream << first.Format(AddressFormat) << AddrSeparator << last.Format(AddressFormat);
+ }
+ if (!data.empty()) {
+ if (printRange) {
+ Stream << DataSeparator;
+ }
+ Stream << data;
+ }
+ if (!data.empty() || printRange) {
+ Stream << "\n";
+ }
+}
+
+void TWriter::Finalize() {
+}
+
+TMergingWriter::TMergingWriter(IOutputStream& stream, EAddressFormat addressFormat, const TString& addrSep, const TString& dataSep, const bool splitMixed)
+ : TWriter(stream, addressFormat, addrSep, dataSep, splitMixed) {
+}
+
+void TMergingWriter::Write(const TAddress& first, const TAddress& last, const TString& data, bool) {
+ if (Initialized && data == StoredData && first == StoredLast.Next()) {
+ StoredLast = last;
+ } else {
+ if (Initialized)
+ TWriter::Write(StoredFirst, StoredLast, StoredData);
+ StoredFirst = first;
+ StoredLast = last;
+ StoredData = data;
+ Initialized = true;
+ }
+}
+
+void TMergingWriter::Finalize() {
+ if (Initialized)
+ TWriter::Write(StoredFirst, StoredLast, StoredData);
+ Initialized = false;
+}
+
+} // NIPREG
diff --git a/library/cpp/ipreg/writer.h b/library/cpp/ipreg/writer.h
new file mode 100644
index 0000000000..a4232a89a6
--- /dev/null
+++ b/library/cpp/ipreg/writer.h
@@ -0,0 +1,62 @@
+#pragma once
+
+#include "range.h"
+
+#include <util/generic/ptr.h>
+#include <util/generic/string.h>
+#include <util/stream/output.h>
+
+namespace NIPREG {
+
+class TWriter {
+public:
+ static constexpr char const * const ADDR_SEP = "-";
+ static constexpr char const * const DATA_SEP = "\t";
+
+public:
+ TWriter(const TString& filename = "");
+ TWriter(IOutputStream& stream, EAddressFormat addressFormat = EAddressFormat::IPV6, const TString& addrSep = ADDR_SEP, const TString& dataSep = DATA_SEP, const bool splitMixed = false);
+ TWriter(IOutputStream& stream, const TString& addrSep, EAddressFormat addressFormat)
+ : TWriter(stream, addressFormat, addrSep, addrSep)
+ {}
+ virtual ~TWriter() {}
+
+ void Write(const TGenericEntry& entry, bool printRange = true) {
+ Write(entry.First, entry.Last, entry.Data, printRange);
+ }
+ virtual void Write(const TAddress& first, const TAddress& last, const TString& data, bool printRange = true);
+ virtual void Finalize();
+
+ operator IOutputStream&() {
+ return Stream;
+ }
+
+private:
+ void WriteImpl(const TAddress& first, const TAddress& last, const TString& data, bool printRange);
+
+ TAutoPtr<IOutputStream> OwnedStreamPtr;
+ IOutputStream& Stream;
+
+ EAddressFormat AddressFormat = EAddressFormat::IPV6;
+ const TString AddrSeparator = ADDR_SEP;
+ const TString DataSeparator = DATA_SEP;
+ const bool SplitMixed;
+};
+
+class TMergingWriter : public TWriter {
+public:
+ TMergingWriter(IOutputStream& stream, EAddressFormat addressFormat = EAddressFormat::IPV6, const TString& addrSep = ADDR_SEP, const TString& dataSep = DATA_SEP, const bool splitMixed = false);
+ TMergingWriter(IOutputStream& stream, const TString& addrSep, EAddressFormat addressFormat)
+ : TWriter(stream, addressFormat, addrSep, addrSep)
+ {}
+ void Write(const TAddress& first, const TAddress& last, const TString& data, bool printRange = true) final override;
+ void Finalize() final;
+
+private:
+ TAddress StoredFirst;
+ TAddress StoredLast;
+ TString StoredData;
+ bool Initialized = false;
+};
+
+} // NIPREG
diff --git a/library/cpp/ipreg/ya.make b/library/cpp/ipreg/ya.make
new file mode 100644
index 0000000000..b03720f761
--- /dev/null
+++ b/library/cpp/ipreg/ya.make
@@ -0,0 +1,26 @@
+LIBRARY()
+
+SRCS(
+ address.cpp
+ checker.cpp
+ merge.cpp
+ range.cpp
+ reader.cpp
+ sources.cpp
+ split.cpp
+ stopwatch.cpp
+ writer.cpp
+ util_helpers.cpp
+)
+
+PEERDIR(
+ library/cpp/getopt/small
+ library/cpp/json
+ library/cpp/geobase
+ library/cpp/int128
+)
+
+GENERATE_ENUM_SERIALIZATION(address.h)
+GENERATE_ENUM_SERIALIZATION(sources.h)
+
+END()
diff --git a/library/cpp/langmask/CMakeLists.txt b/library/cpp/langmask/CMakeLists.txt
new file mode 100644
index 0000000000..499930c4b0
--- /dev/null
+++ b/library/cpp/langmask/CMakeLists.txt
@@ -0,0 +1,9 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(proto)
diff --git a/library/cpp/langmask/proto/CMakeLists.darwin-x86_64.txt b/library/cpp/langmask/proto/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..e9f692d0f2
--- /dev/null
+++ b/library/cpp/langmask/proto/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,43 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(cpp-langmask-proto)
+target_link_libraries(cpp-langmask-proto PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-protobuf
+)
+target_proto_messages(cpp-langmask-proto PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/langmask/proto/langmask.proto
+)
+target_proto_addincls(cpp-langmask-proto
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(cpp-langmask-proto
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/library/cpp/langmask/proto/CMakeLists.linux-aarch64.txt b/library/cpp/langmask/proto/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..61f975983e
--- /dev/null
+++ b/library/cpp/langmask/proto/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,44 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(cpp-langmask-proto)
+target_link_libraries(cpp-langmask-proto PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-protobuf
+)
+target_proto_messages(cpp-langmask-proto PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/langmask/proto/langmask.proto
+)
+target_proto_addincls(cpp-langmask-proto
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(cpp-langmask-proto
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/library/cpp/langmask/proto/CMakeLists.linux-x86_64.txt b/library/cpp/langmask/proto/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..61f975983e
--- /dev/null
+++ b/library/cpp/langmask/proto/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,44 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(cpp-langmask-proto)
+target_link_libraries(cpp-langmask-proto PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-protobuf
+)
+target_proto_messages(cpp-langmask-proto PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/langmask/proto/langmask.proto
+)
+target_proto_addincls(cpp-langmask-proto
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(cpp-langmask-proto
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/library/cpp/langmask/proto/CMakeLists.txt b/library/cpp/langmask/proto/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/library/cpp/langmask/proto/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/library/cpp/langmask/proto/CMakeLists.windows-x86_64.txt b/library/cpp/langmask/proto/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..e9f692d0f2
--- /dev/null
+++ b/library/cpp/langmask/proto/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,43 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(cpp-langmask-proto)
+target_link_libraries(cpp-langmask-proto PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-protobuf
+)
+target_proto_messages(cpp-langmask-proto PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/langmask/proto/langmask.proto
+)
+target_proto_addincls(cpp-langmask-proto
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(cpp-langmask-proto
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/library/cpp/langmask/proto/langmask.proto b/library/cpp/langmask/proto/langmask.proto
new file mode 100644
index 0000000000..be23ecfbba
--- /dev/null
+++ b/library/cpp/langmask/proto/langmask.proto
@@ -0,0 +1,6 @@
+package NProto;
+
+message TLangMask {
+ repeated uint32 Bits = 1; // binary
+ optional string Names = 2; // human readable
+}
diff --git a/library/cpp/langmask/proto/ya.make b/library/cpp/langmask/proto/ya.make
new file mode 100644
index 0000000000..823a0ad261
--- /dev/null
+++ b/library/cpp/langmask/proto/ya.make
@@ -0,0 +1,11 @@
+PROTO_LIBRARY()
+
+SRCS(
+ langmask.proto
+)
+
+IF (NOT PY_PROTOS_FOR)
+ EXCLUDE_TAGS(GO_PROTO)
+ENDIF()
+
+END()
diff --git a/library/cpp/microbdb/CMakeLists.darwin-x86_64.txt b/library/cpp/microbdb/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..c4d2e9d3a4
--- /dev/null
+++ b/library/cpp/microbdb/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,56 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+find_package(ZLIB REQUIRED)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(library-cpp-microbdb)
+target_link_libraries(library-cpp-microbdb PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-fastlz
+ contrib-libs-libc_compat
+ contrib-libs-protobuf
+ contrib-libs-snappy
+ ZLIB::ZLIB
+ cpp-deprecated-fgood
+ cpp-on_disk-st_hash
+ library-cpp-packedtypes
+)
+target_proto_messages(library-cpp-microbdb PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/microbdb/noextinfo.proto
+)
+target_sources(library-cpp-microbdb PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/microbdb/file.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/microbdb/header.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/microbdb/microbdb.cpp
+)
+target_proto_addincls(library-cpp-microbdb
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(library-cpp-microbdb
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/library/cpp/microbdb/CMakeLists.linux-aarch64.txt b/library/cpp/microbdb/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..302dbd03cd
--- /dev/null
+++ b/library/cpp/microbdb/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,57 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+find_package(ZLIB REQUIRED)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(library-cpp-microbdb)
+target_link_libraries(library-cpp-microbdb PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-fastlz
+ contrib-libs-libc_compat
+ contrib-libs-protobuf
+ contrib-libs-snappy
+ ZLIB::ZLIB
+ cpp-deprecated-fgood
+ cpp-on_disk-st_hash
+ library-cpp-packedtypes
+)
+target_proto_messages(library-cpp-microbdb PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/microbdb/noextinfo.proto
+)
+target_sources(library-cpp-microbdb PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/microbdb/file.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/microbdb/header.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/microbdb/microbdb.cpp
+)
+target_proto_addincls(library-cpp-microbdb
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(library-cpp-microbdb
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/library/cpp/microbdb/CMakeLists.linux-x86_64.txt b/library/cpp/microbdb/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..302dbd03cd
--- /dev/null
+++ b/library/cpp/microbdb/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,57 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+find_package(ZLIB REQUIRED)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(library-cpp-microbdb)
+target_link_libraries(library-cpp-microbdb PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-fastlz
+ contrib-libs-libc_compat
+ contrib-libs-protobuf
+ contrib-libs-snappy
+ ZLIB::ZLIB
+ cpp-deprecated-fgood
+ cpp-on_disk-st_hash
+ library-cpp-packedtypes
+)
+target_proto_messages(library-cpp-microbdb PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/microbdb/noextinfo.proto
+)
+target_sources(library-cpp-microbdb PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/microbdb/file.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/microbdb/header.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/microbdb/microbdb.cpp
+)
+target_proto_addincls(library-cpp-microbdb
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(library-cpp-microbdb
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/library/cpp/microbdb/CMakeLists.txt b/library/cpp/microbdb/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/library/cpp/microbdb/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/library/cpp/microbdb/CMakeLists.windows-x86_64.txt b/library/cpp/microbdb/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..c4d2e9d3a4
--- /dev/null
+++ b/library/cpp/microbdb/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,56 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+find_package(ZLIB REQUIRED)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(library-cpp-microbdb)
+target_link_libraries(library-cpp-microbdb PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-fastlz
+ contrib-libs-libc_compat
+ contrib-libs-protobuf
+ contrib-libs-snappy
+ ZLIB::ZLIB
+ cpp-deprecated-fgood
+ cpp-on_disk-st_hash
+ library-cpp-packedtypes
+)
+target_proto_messages(library-cpp-microbdb PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/microbdb/noextinfo.proto
+)
+target_sources(library-cpp-microbdb PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/microbdb/file.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/microbdb/header.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/microbdb/microbdb.cpp
+)
+target_proto_addincls(library-cpp-microbdb
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(library-cpp-microbdb
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/library/cpp/microbdb/align.h b/library/cpp/microbdb/align.h
new file mode 100644
index 0000000000..2f8567f134
--- /dev/null
+++ b/library/cpp/microbdb/align.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include <util/system/defaults.h>
+
+using TDatAlign = int;
+
+static inline size_t DatFloor(size_t size) {
+ return (size - 1) & ~(sizeof(TDatAlign) - 1);
+}
+
+static inline size_t DatCeil(size_t size) {
+ return DatFloor(size) + sizeof(TDatAlign);
+}
+
+static inline void DatSet(void* ptr, size_t size) {
+ *(TDatAlign*)((char*)ptr + DatFloor(size)) = 0;
+}
diff --git a/library/cpp/microbdb/compressed.h b/library/cpp/microbdb/compressed.h
new file mode 100644
index 0000000000..f0c9edfa92
--- /dev/null
+++ b/library/cpp/microbdb/compressed.h
@@ -0,0 +1,520 @@
+#pragma once
+
+#include <util/stream/zlib.h>
+
+#include "microbdb.h"
+#include "safeopen.h"
+
+class TCompressedInputFileManip: public TInputFileManip {
+public:
+ inline i64 GetLength() const {
+ return -1; // Some microbdb logic rely on unknown size of compressed files
+ }
+
+ inline i64 Seek(i64 offset, int whence) {
+ i64 oldPos = DoGetPosition();
+ i64 newPos = offset;
+ switch (whence) {
+ case SEEK_CUR:
+ newPos += oldPos;
+ [[fallthrough]]; // Complier happy. Please fix it!
+ case SEEK_SET:
+ break;
+ default:
+ return -1L;
+ }
+ if (oldPos > newPos) {
+ VerifyRandomAccess();
+ DoSeek(0, SEEK_SET, IsStreamOpen());
+ oldPos = 0;
+ }
+ const size_t bufsize = 1 << 12;
+ char buf[bufsize];
+ for (i64 i = oldPos; i < newPos; i += bufsize)
+ InputStream->Read(buf, (i + (i64)bufsize < newPos) ? bufsize : (size_t)(newPos - i));
+ return newPos;
+ }
+
+ i64 RealSeek(i64 offset, int whence) {
+ InputStream.Destroy();
+ i64 ret = DoSeek(offset, whence, !!CompressedInput);
+ if (ret != -1)
+ DoStreamOpen(DoCreateStream(), true);
+ return ret;
+ }
+
+protected:
+ IInputStream* CreateStream(const TFile& file) override {
+ CompressedInput.Reset(new TUnbufferedFileInput(file));
+ return DoCreateStream();
+ }
+ inline IInputStream* DoCreateStream() {
+ return new TZLibDecompress(CompressedInput.Get(), ZLib::GZip);
+ //return new TLzqDecompress(CompressedInput.Get());
+ }
+ THolder<IInputStream> CompressedInput;
+};
+
+class TCompressedBufferedInputFileManip: public TCompressedInputFileManip {
+protected:
+ IInputStream* CreateStream(const TFile& file) override {
+ CompressedInput.Reset(new TFileInput(file, 0x100000));
+ return DoCreateStream();
+ }
+};
+
+using TCompressedInputPageFile = TInputPageFileImpl<TCompressedInputFileManip>;
+using TCompressedBufferedInputPageFile = TInputPageFileImpl<TCompressedBufferedInputFileManip>;
+
+template <class TVal>
+struct TGzKey {
+ ui64 Offset;
+ TVal Key;
+
+ static const ui32 RecordSig = TVal::RecordSig + 0x50495a47;
+
+ TGzKey() {
+ }
+
+ TGzKey(ui64 offset, const TVal& key)
+ : Offset(offset)
+ , Key(key)
+ {
+ }
+
+ size_t SizeOf() const {
+ if (this)
+ return sizeof(Offset) + ::SizeOf(&Key);
+ else {
+ size_t sizeOfKey = ::SizeOf((TVal*)NULL);
+ return sizeOfKey ? (sizeof(Offset) + sizeOfKey) : 0;
+ }
+ }
+};
+
+template <class TVal>
+class TInZIndexFile: protected TInDatFileImpl<TGzKey<TVal>> {
+ typedef TInDatFileImpl<TGzKey<TVal>> TDatFile;
+ typedef TGzKey<TVal> TGzVal;
+ typedef typename TDatFile::TRecIter TRecIter;
+ typedef typename TRecIter::TPageIter TPageIter;
+
+public:
+ TInZIndexFile()
+ : Index0(nullptr)
+ {
+ }
+
+ int Open(const char* fname, size_t pages = 1, int pagesOrBytes = 1, ui32* gotRecordSig = nullptr) {
+ int ret = TDatFile::Open(fname, pages, pagesOrBytes, gotRecordSig);
+ if (ret)
+ return ret;
+ if (!(Index0 = (TDatPage*)malloc(TPageIter::GetPageSize()))) {
+ TDatFile::Close();
+ return MBDB_NO_MEMORY;
+ }
+ if (SizeOf((TGzVal*)NULL))
+ RecsOnPage = (TPageIter::GetPageSize() - sizeof(TDatPage)) / DatCeil(SizeOf((TGzVal*)NULL));
+ TDatFile::Next();
+ memcpy(Index0, TPageIter::Current(), TPageIter::GetPageSize());
+ return 0;
+ }
+
+ int Close() {
+ free(Index0);
+ Index0 = NULL;
+ return TDatFile::Close();
+ }
+
+ inline int GetError() const {
+ return TDatFile::GetError();
+ }
+
+ int FindKey(const TVal* akey, const typename TExtInfoType<TVal>::TResult* = NULL) {
+ assert(IsOpen());
+ if (!SizeOf((TVal*)NULL))
+ return FindVszKey(akey);
+ int pageno;
+ i64 offset;
+ FindKeyOnPage(pageno, offset, Index0, akey);
+ TDatPage* page = TPageIter::GotoPage(pageno + 1);
+ int num_add = (int)offset;
+ FindKeyOnPage(pageno, offset, page, akey);
+ return pageno + num_add;
+ }
+
+ using TDatFile::IsOpen;
+
+ int FindVszKey(const TVal* akey, const typename TExtInfoType<TVal>::TResult* = NULL) {
+ int pageno;
+ i64 offset;
+ FindVszKeyOnPage(pageno, offset, Index0, akey);
+ TDatPage* page = TPageIter::GotoPage(pageno + 1);
+ int num_add = (int)offset;
+ FindVszKeyOnPage(pageno, offset, page, akey);
+ return pageno + num_add;
+ }
+
+ i64 FindPage(int pageno) {
+ if (!SizeOf((TVal*)NULL))
+ return FindVszPage(pageno);
+ int recsize = DatCeil(SizeOf((TGzVal*)NULL));
+ TDatPage* page = TPageIter::GotoPage(1 + pageno / RecsOnPage);
+ if (!page) // can happen if pageno is beyond EOF
+ return -1;
+ unsigned int localpageno = pageno % RecsOnPage;
+ if (localpageno >= page->RecNum) // can happen if pageno is beyond EOF
+ return -1;
+ TGzVal* v = (TGzVal*)((char*)page + sizeof(TDatPage) + localpageno * recsize);
+ return v->Offset;
+ }
+
+ i64 FindVszPage(int pageno) {
+ TGzVal* cur = (TGzVal*)((char*)Index0 + sizeof(TDatPage));
+ TGzVal* prev = cur;
+ unsigned int n = 0;
+ while (n < Index0->RecNum && cur->Offset <= (unsigned int)pageno) {
+ prev = cur;
+ cur = (TGzVal*)((char*)cur + DatCeil(SizeOf(cur)));
+ n++;
+ }
+ TDatPage* page = TPageIter::GotoPage(n);
+ unsigned int num_add = (unsigned int)(prev->Offset);
+ n = 0;
+ cur = (TGzVal*)((char*)page + sizeof(TDatPage));
+ while (n < page->RecNum && n + num_add < (unsigned int)pageno) {
+ cur = (TGzVal*)((char*)cur + DatCeil(SizeOf(cur)));
+ n++;
+ }
+ if (n == page->RecNum) // can happen if pageno is beyond EOF
+ return -1;
+ return cur->Offset;
+ }
+
+protected:
+ void FindKeyOnPage(int& pageno, i64& offset, TDatPage* page, const TVal* Key) {
+ int left = 0;
+ int right = page->RecNum - 1;
+ int recsize = DatCeil(SizeOf((TGzVal*)NULL));
+ while (left < right) {
+ int middle = (left + right) >> 1;
+ if (((TGzVal*)((char*)page + sizeof(TDatPage) + middle * recsize))->Key < *Key)
+ left = middle + 1;
+ else
+ right = middle;
+ }
+ //borders check (left and right)
+ pageno = (left == 0 || ((TGzVal*)((char*)page + sizeof(TDatPage) + left * recsize))->Key < *Key) ? left : left - 1;
+ offset = ((TGzVal*)((char*)page + sizeof(TDatPage) + pageno * recsize))->Offset;
+ }
+
+ void FindVszKeyOnPage(int& pageno, i64& offset, TDatPage* page, const TVal* key) {
+ TGzVal* cur = (TGzVal*)((char*)page + sizeof(TDatPage));
+ ui32 RecordSig = page->RecNum;
+ i64 tmpoffset = cur->Offset;
+ for (; RecordSig > 0 && cur->Key < *key; --RecordSig) {
+ tmpoffset = cur->Offset;
+ cur = (TGzVal*)((char*)cur + DatCeil(SizeOf(cur)));
+ }
+ int idx = page->RecNum - RecordSig - 1;
+ pageno = (idx >= 0) ? idx : 0;
+ offset = tmpoffset;
+ }
+
+ TDatPage* Index0;
+ int RecsOnPage;
+};
+
+template <class TKey>
+class TCompressedIndexedInputPageFile: public TCompressedInputPageFile {
+public:
+ int GotoPage(int pageno);
+
+protected:
+ TInZIndexFile<TKey> KeyFile;
+};
+
+template <class TVal, class TKey>
+class TDirectCompressedInDatFile: public TDirectInDatFile<TVal, TKey,
+ TInDatFileImpl<TVal, TInputRecordIterator<TVal,
+ TInputPageIterator<TCompressedIndexedInputPageFile<TKey>>>>> {
+};
+
+class TCompressedOutputFileManip: public TOutputFileManip {
+public:
+ inline i64 GetLength() const {
+ return -1; // Some microbdb logic rely on unknown size of compressed files
+ }
+
+ inline i64 Seek(i64 offset, int whence) {
+ i64 oldPos = DoGetPosition();
+ i64 newPos = offset;
+ switch (whence) {
+ case SEEK_CUR:
+ newPos += oldPos;
+ [[fallthrough]]; // Compler happy. Please fix it!
+ case SEEK_SET:
+ break;
+ default:
+ return -1L;
+ }
+ if (oldPos > newPos)
+ return -1L;
+
+ const size_t bufsize = 1 << 12;
+ char buf[bufsize] = {0};
+ for (i64 i = oldPos; i < newPos; i += bufsize)
+ OutputStream->Write(buf, (i + (i64)bufsize < newPos) ? bufsize : (size_t)(newPos - i));
+ return newPos;
+ }
+
+ i64 RealSeek(i64 offset, int whence) {
+ OutputStream.Destroy();
+ i64 ret = DoSeek(offset, whence, !!CompressedOutput);
+ if (ret != -1)
+ DoStreamOpen(DoCreateStream(), true);
+ return ret;
+ }
+
+protected:
+ IOutputStream* CreateStream(const TFile& file) override {
+ CompressedOutput.Reset(new TUnbufferedFileOutput(file));
+ return DoCreateStream();
+ }
+ inline IOutputStream* DoCreateStream() {
+ return new TZLibCompress(CompressedOutput.Get(), ZLib::GZip, 1);
+ }
+ THolder<IOutputStream> CompressedOutput;
+};
+
+class TCompressedBufferedOutputFileManip: public TCompressedOutputFileManip {
+protected:
+ IOutputStream* CreateStream(const TFile& file) override {
+ CompressedOutput.Reset(new TUnbufferedFileOutput(file));
+ return DoCreateStream();
+ }
+ inline IOutputStream* DoCreateStream() {
+ return new TZLibCompress(CompressedOutput.Get(), ZLib::GZip, 1, 0x100000);
+ }
+};
+
+using TCompressedOutputPageFile = TOutputPageFileImpl<TCompressedOutputFileManip>;
+using TCompressedBufferedOutputPageFile = TOutputPageFileImpl<TCompressedBufferedOutputFileManip>;
+
+template <class TVal>
+class TOutZIndexFile: public TOutDatFileImpl<
+ TGzKey<TVal>,
+ TOutputRecordIterator<TGzKey<TVal>, TOutputPageIterator<TOutputPageFile>, TCallbackIndexer>> {
+ typedef TOutDatFileImpl<
+ TGzKey<TVal>,
+ TOutputRecordIterator<TGzKey<TVal>, TOutputPageIterator<TOutputPageFile>, TCallbackIndexer>>
+ TDatFile;
+ typedef TOutZIndexFile<TVal> TMyType;
+ typedef TGzKey<TVal> TGzVal;
+ typedef typename TDatFile::TRecIter TRecIter;
+ typedef typename TRecIter::TPageIter TPageIter;
+ typedef typename TRecIter::TIndexer TIndexer;
+
+public:
+ TOutZIndexFile() {
+ TotalRecNum = 0;
+ TIndexer::SetCallback(this, DispatchCallback);
+ }
+
+ int Open(const char* fname, size_t pagesize, size_t pages, int pagesOrBytes = 1) {
+ int ret = TDatFile::Open(fname, pagesize, pages, pagesOrBytes);
+ if (ret)
+ return ret;
+ if ((ret = TRecIter::GotoPage(1)))
+ TDatFile::Close();
+ return ret;
+ }
+
+ int Close() {
+ TPageIter::Unfreeze();
+ if (TRecIter::RecNum)
+ NextPage(TPageIter::Current());
+ int ret = 0;
+ if (Index0.size() && !(ret = TRecIter::GotoPage(0))) {
+ typename std::vector<TGzVal>::iterator it, end = Index0.end();
+ for (it = Index0.begin(); it != end; ++it)
+ TRecIter::Push(&*it);
+ ret = (TPageIter::GetPageNum() != 0) ? MBDB_PAGE_OVERFLOW : TPageIter::GetError();
+ }
+ Index0.clear();
+ int ret1 = TDatFile::Close();
+ return ret ? ret : ret1;
+ }
+
+protected:
+ int TotalRecNum; // should be enough because we have GotoPage(int)
+ std::vector<TGzVal> Index0;
+
+ void NextPage(const TDatPage* page) {
+ TGzVal* rec = (TGzVal*)((char*)page + sizeof(TDatPage));
+ Index0.push_back(TGzVal(TotalRecNum, rec->Key));
+ TotalRecNum += TRecIter::RecNum;
+ }
+
+ static void DispatchCallback(void* This, const TDatPage* page) {
+ ((TMyType*)This)->NextPage(page);
+ }
+};
+
+template <class TVal, class TKey, class TPageFile = TCompressedOutputPageFile>
+class TOutDirectCompressedFileImpl: public TOutDatFileImpl<
+ TVal,
+ TOutputRecordIterator<TVal, TOutputPageIterator<TPageFile>, TCallbackIndexer>> {
+ typedef TOutDatFileImpl<
+ TVal,
+ TOutputRecordIterator<TVal, TOutputPageIterator<TPageFile>, TCallbackIndexer>>
+ TDatFile;
+ typedef TOutDirectCompressedFileImpl<TVal, TKey, TPageFile> TMyType;
+ typedef typename TDatFile::TRecIter TRecIter;
+ typedef typename TRecIter::TPageIter TPageIter;
+ typedef typename TRecIter::TIndexer TIndexer;
+ typedef TGzKey<TKey> TMyKey;
+ typedef TOutZIndexFile<TKey> TKeyFile;
+
+protected:
+ using TDatFile::Tell;
+
+public:
+ TOutDirectCompressedFileImpl() {
+ TIndexer::SetCallback(this, DispatchCallback);
+ }
+
+ int Open(const char* fname, size_t pagesize, size_t ipagesize = 0) {
+ char iname[FILENAME_MAX];
+ int ret;
+ if (ipagesize == 0)
+ ipagesize = pagesize;
+
+ ret = TDatFile::Open(fname, pagesize, 1, 1);
+ ret = ret ? ret : DatNameToIdx(iname, fname);
+ ret = ret ? ret : KeyFile.Open(iname, ipagesize, 1, 1);
+ if (ret)
+ TDatFile::Close();
+ return ret;
+ }
+
+ int Close() {
+ if (TRecIter::RecNum)
+ NextPage(TPageIter::Current());
+ int ret = KeyFile.Close();
+ int ret1 = TDatFile::Close();
+ return ret1 ? ret1 : ret;
+ }
+
+ int GetError() const {
+ return TDatFile::GetError() ? TDatFile::GetError() : KeyFile.GetError();
+ }
+
+protected:
+ TKeyFile KeyFile;
+
+ void NextPage(const TDatPage* page) {
+ size_t sz = SizeOf((TMyKey*)NULL);
+ TMyKey* rec = KeyFile.Reserve(sz ? sz : MaxSizeOf<TMyKey>());
+ if (rec) {
+ rec->Offset = Tell();
+ rec->Key = *(TVal*)((char*)page + sizeof(TDatPage));
+ KeyFile.ResetDat();
+ }
+ }
+
+ static void DispatchCallback(void* This, const TDatPage* page) {
+ ((TMyType*)This)->NextPage(page);
+ }
+};
+
+template <class TKey>
+int TCompressedIndexedInputPageFile<TKey>::GotoPage(int pageno) {
+ if (Error)
+ return Error;
+
+ Eof = 0;
+
+ i64 offset = KeyFile.FindPage(pageno);
+ if (!offset)
+ return Error = MBDB_BAD_FILE_SIZE;
+
+ if (offset != FileManip.RealSeek(offset, SEEK_SET))
+ Error = MBDB_BAD_FILE_SIZE;
+
+ return Error;
+}
+
+template <typename TVal>
+class TCompressedInDatFile: public TInDatFile<TVal, TCompressedInputPageFile> {
+public:
+ TCompressedInDatFile(const char* name, size_t pages, int pagesOrBytes = 1)
+ : TInDatFile<TVal, TCompressedInputPageFile>(name, pages, pagesOrBytes)
+ {
+ }
+};
+
+template <typename TVal>
+class TCompressedOutDatFile: public TOutDatFile<TVal, TFakeCompression, TCompressedOutputPageFile> {
+public:
+ TCompressedOutDatFile(const char* name, size_t pagesize, size_t pages, int pagesOrBytes = 1)
+ : TOutDatFile<TVal, TFakeCompression, TCompressedOutputPageFile>(name, pagesize, pages, pagesOrBytes)
+ {
+ }
+};
+
+template <typename TVal, typename TKey, typename TPageFile = TCompressedOutputPageFile>
+class TOutDirectCompressedFile: protected TOutDirectCompressedFileImpl<TVal, TKey, TPageFile> {
+ typedef TOutDirectCompressedFileImpl<TVal, TKey, TPageFile> TBase;
+
+public:
+ TOutDirectCompressedFile(const char* name, size_t pagesize, size_t ipagesize = 0)
+ : Name(strdup(name))
+ , PageSize(pagesize)
+ , IdxPageSize(ipagesize)
+ {
+ }
+
+ ~TOutDirectCompressedFile() {
+ Close();
+ free(Name);
+ Name = NULL;
+ }
+
+ void Open(const char* fname) {
+ int ret = TBase::Open(fname, PageSize, IdxPageSize);
+ if (ret)
+ ythrow yexception() << ErrorMessage(ret, "Failed to open output file", fname);
+ free(Name);
+ Name = strdup(fname);
+ }
+
+ void Close() {
+ int ret;
+ if ((ret = TBase::GetError()))
+ if (!std::uncaught_exception())
+ ythrow yexception() << ErrorMessage(ret, "Error before closing output file", Name);
+ if ((ret = TBase::Close()))
+ if (!std::uncaught_exception())
+ ythrow yexception() << ErrorMessage(ret, "Error while closing output file", Name);
+ }
+
+ const char* GetName() const {
+ return Name;
+ }
+
+ using TBase::Freeze;
+ using TBase::Push;
+ using TBase::Reserve;
+ using TBase::Unfreeze;
+
+protected:
+ char* Name;
+ size_t PageSize, IdxPageSize;
+};
+
+class TCompressedInterFileTypes {
+public:
+ typedef TCompressedBufferedOutputPageFile TOutPageFile;
+ typedef TCompressedBufferedInputPageFile TInPageFile;
+};
diff --git a/library/cpp/microbdb/extinfo.h b/library/cpp/microbdb/extinfo.h
new file mode 100644
index 0000000000..c8389e783c
--- /dev/null
+++ b/library/cpp/microbdb/extinfo.h
@@ -0,0 +1,127 @@
+#pragma once
+
+#include "header.h"
+
+#include <library/cpp/packedtypes/longs.h>
+
+#include <util/generic/typetraits.h>
+
+#include <library/cpp/microbdb/noextinfo.pb.h>
+
+inline bool operator<(const TNoExtInfo&, const TNoExtInfo&) {
+ return false;
+}
+
+namespace NMicroBDB {
+ Y_HAS_MEMBER(TExtInfo);
+
+ template <class, bool>
+ struct TSelectExtInfo;
+
+ template <class T>
+ struct TSelectExtInfo<T, false> {
+ typedef TNoExtInfo TExtInfo;
+ };
+
+ template <class T>
+ struct TSelectExtInfo<T, true> {
+ typedef typename T::TExtInfo TExtInfo;
+ };
+
+ template <class T>
+ class TExtInfoType {
+ public:
+ static const bool Exists = THasTExtInfo<T>::value;
+ typedef typename TSelectExtInfo<T, Exists>::TExtInfo TResult;
+ };
+
+ Y_HAS_MEMBER(MakeExtKey);
+
+ template <class, class, bool>
+ struct TSelectMakeExtKey;
+
+ template <class TVal, class TKey>
+ struct TSelectMakeExtKey<TVal, TKey, false> {
+ static inline void Make(TKey* to, typename TExtInfoType<TKey>::TResult*, const TVal* from, const typename TExtInfoType<TVal>::TResult*) {
+ *to = *from;
+ }
+ };
+
+ template <class TVal, class TKey>
+ struct TSelectMakeExtKey<TVal, TKey, true> {
+ static inline void Make(TKey* to, typename TExtInfoType<TKey>::TResult* toExt, const TVal* from, const typename TExtInfoType<TVal>::TResult* fromExt) {
+ TVal::MakeExtKey(to, toExt, from, fromExt);
+ }
+ };
+
+ template <typename T>
+ inline size_t SizeOfExt(const T* rec, size_t* /*out*/ extLenSize = nullptr, size_t* /*out*/ extSize = nullptr) {
+ if (!TExtInfoType<T>::Exists) {
+ if (extLenSize)
+ *extLenSize = 0;
+ if (extSize)
+ *extSize = 0;
+ return SizeOf(rec);
+ } else {
+ size_t sz = SizeOf(rec);
+ i64 l;
+ int els = in_long(l, (const char*)rec + sz);
+ if (extLenSize)
+ *extLenSize = static_cast<size_t>(els);
+ if (extSize)
+ *extSize = static_cast<size_t>(l);
+ return sz;
+ }
+ }
+
+ template <class T>
+ bool GetExtInfo(const T* rec, typename TExtInfoType<T>::TResult* extInfo) {
+ Y_VERIFY(TExtInfoType<T>::Exists, "GetExtInfo should only be used with extended records");
+ if (!rec)
+ return false;
+ size_t els;
+ size_t es;
+ size_t s = SizeOfExt(rec, &els, &es);
+ const ui8* raw = (const ui8*)rec + s + els;
+ return extInfo->ParseFromArray(raw, es);
+ }
+
+ template <class T>
+ const ui8* GetExtInfoRaw(const T* rec, size_t* len) {
+ Y_VERIFY(TExtInfoType<T>::Exists, "GetExtInfo should only be used with extended records");
+ if (!rec) {
+ *len = 0;
+ return nullptr;
+ }
+ size_t els;
+ size_t es;
+ size_t s = SizeOfExt(rec, &els, &es);
+ *len = els + es;
+ return (const ui8*)rec + s;
+ }
+
+ // Compares serialized extInfo (e.g. for stable sort)
+ template <class T>
+ int CompareExtInfo(const T* a, const T* b) {
+ Y_VERIFY(TExtInfoType<T>::Exists, "CompareExtInfo should only be used with extended records");
+ size_t elsA, esA;
+ size_t elsB, esB;
+ SizeOfExt(a, &elsA, &esA);
+ SizeOfExt(a, &elsB, &esB);
+ if (esA != esB)
+ return esA - esB;
+ else
+ return memcmp((const ui8*)a + elsA, (const ui8*)b + elsB, esA);
+ }
+
+}
+
+using NMicroBDB::TExtInfoType;
+
+template <class TVal, class TKey>
+struct TMakeExtKey {
+ static const bool Exists = NMicroBDB::THasMakeExtKey<TVal>::value;
+ static inline void Make(TKey* to, typename TExtInfoType<TKey>::TResult* toExt, const TVal* from, const typename TExtInfoType<TVal>::TResult* fromExt) {
+ NMicroBDB::TSelectMakeExtKey<TVal, TKey, Exists>::Make(to, toExt, from, fromExt);
+ }
+};
diff --git a/library/cpp/microbdb/file.cpp b/library/cpp/microbdb/file.cpp
new file mode 100644
index 0000000000..599a7301a0
--- /dev/null
+++ b/library/cpp/microbdb/file.cpp
@@ -0,0 +1,220 @@
+#include "file.h"
+
+#include <fcntl.h>
+#include <errno.h>
+#include <sys/stat.h>
+
+#ifdef _win32_
+#define S_ISREG(x) !!(x & S_IFREG)
+#endif
+
+TFileManipBase::TFileManipBase()
+ : FileBased(true)
+{
+}
+
+i64 TFileManipBase::DoSeek(i64 offset, int whence, bool isStreamOpen) {
+ if (!isStreamOpen)
+ return -1;
+ VerifyRandomAccess();
+ return File.Seek(offset, (SeekDir)whence);
+}
+
+int TFileManipBase::DoFileOpen(const TFile& file) {
+ File = file;
+ SetFileBased(IsFileBased());
+ return (File.IsOpen()) ? 0 : MBDB_OPEN_ERROR;
+}
+
+int TFileManipBase::DoFileClose() {
+ if (File.IsOpen()) {
+ File.Close();
+ return MBDB_ALREADY_INITIALIZED;
+ }
+ return 0;
+}
+
+int TFileManipBase::IsFileBased() const {
+ bool fileBased = true;
+#if defined(_win_)
+#elif defined(_unix_)
+ FHANDLE h = File.GetHandle();
+ struct stat sb;
+ fileBased = false;
+ if (h != INVALID_FHANDLE && !::fstat(h, &sb) && S_ISREG(sb.st_mode)) {
+ fileBased = true;
+ }
+#else
+#error
+#endif
+ return fileBased;
+}
+
+TInputFileManip::TInputFileManip()
+ : InputStream(nullptr)
+{
+}
+
+int TInputFileManip::Open(const char* fname, bool direct) {
+ int ret;
+ return (ret = DoClose()) ? ret : DoStreamOpen(TFile(fname, RdOnly | (direct ? DirectAligned : EOpenMode())));
+}
+
+int TInputFileManip::Open(IInputStream& input) {
+ int ret;
+ return (ret = DoClose()) ? ret : DoStreamOpen(&input);
+}
+
+int TInputFileManip::Open(TAutoPtr<IInputStream> input) {
+ int ret;
+ return (ret = DoClose()) ? ret : DoStreamOpen(input.Release());
+}
+
+int TInputFileManip::Init(const TFile& file) {
+ int ret;
+ if (ret = DoClose())
+ return ret;
+ DoStreamOpen(file);
+ return 0;
+}
+
+int TInputFileManip::Close() {
+ DoClose();
+ return 0;
+}
+
+ssize_t TInputFileManip::Read(void* buf, unsigned len) {
+ if (!IsStreamOpen())
+ return -1;
+ return InputStream->Load(buf, len);
+}
+
+IInputStream* TInputFileManip::CreateStream(const TFile& file) {
+ return new TUnbufferedFileInput(file);
+}
+
+TMappedInputPageFile::TMappedInputPageFile()
+ : Pagesize(0)
+ , Error(0)
+ , Pagenum(0)
+ , Recordsig(0)
+ , Open(false)
+{
+ Term();
+}
+
+TMappedInputPageFile::~TMappedInputPageFile() {
+ Term();
+}
+
+int TMappedInputPageFile::Init(const char* fname, ui32 recsig, ui32* gotRecordSig, bool) {
+ Mappedfile.init(fname);
+ Open = true;
+
+ TDatMetaPage* meta = (TDatMetaPage*)Mappedfile.getData();
+ if (gotRecordSig)
+ *gotRecordSig = meta->RecordSig;
+
+ if (meta->MetaSig != METASIG)
+ Error = MBDB_BAD_METAPAGE;
+ else if (meta->RecordSig != recsig)
+ Error = MBDB_BAD_RECORDSIG;
+
+ if (Error) {
+ Mappedfile.term();
+ return Error;
+ }
+
+ size_t fsize = Mappedfile.getSize();
+ if (fsize < METASIZE)
+ return Error = MBDB_BAD_FILE_SIZE;
+ fsize -= METASIZE;
+ if (fsize % meta->PageSize)
+ return Error = MBDB_BAD_FILE_SIZE;
+ Pagenum = (int)(fsize / meta->PageSize);
+ Pagesize = meta->PageSize;
+ Recordsig = meta->RecordSig;
+ Error = 0;
+ return Error;
+}
+
+int TMappedInputPageFile::Term() {
+ Mappedfile.term();
+ Open = false;
+ return 0;
+}
+
+TOutputFileManip::TOutputFileManip()
+ : OutputStream(nullptr)
+{
+}
+
+int TOutputFileManip::Open(const char* fname, EOpenMode mode) {
+ if (IsStreamOpen()) {
+ return MBDB_ALREADY_INITIALIZED; // should it be closed as TInputFileManip
+ }
+
+ try {
+ if (unlink(fname) && errno != ENOENT) {
+ if (strncmp(fname, "/dev/std", 8))
+ return MBDB_OPEN_ERROR;
+ }
+ TFile file(fname, mode);
+ DoStreamOpen(file);
+ } catch (const TFileError&) {
+ return MBDB_OPEN_ERROR;
+ }
+ return 0;
+}
+
+int TOutputFileManip::Open(IOutputStream& output) {
+ if (IsStreamOpen())
+ return MBDB_ALREADY_INITIALIZED;
+ DoStreamOpen(&output);
+ return 0;
+}
+
+int TOutputFileManip::Open(TAutoPtr<IOutputStream> output) {
+ if (IsStreamOpen())
+ return MBDB_ALREADY_INITIALIZED;
+ DoStreamOpen(output.Release());
+ return 0;
+}
+
+int TOutputFileManip::Init(const TFile& file) {
+ if (IsStreamOpen())
+ return MBDB_ALREADY_INITIALIZED; // should it be closed as TInputFileManip
+ DoStreamOpen(file);
+ return 0;
+}
+
+int TOutputFileManip::Rotate(const char* newfname) {
+ if (!IsStreamOpen()) {
+ return MBDB_NOT_INITIALIZED;
+ }
+
+ try {
+ TFile file(newfname, WrOnly | OpenAlways | TruncExisting | ARW | AWOther);
+ DoClose();
+ DoStreamOpen(file);
+ } catch (const TFileError&) {
+ return MBDB_OPEN_ERROR;
+ }
+ return 0;
+}
+
+int TOutputFileManip::Close() {
+ DoClose();
+ return 0;
+}
+
+int TOutputFileManip::Write(const void* buf, unsigned len) {
+ if (!IsStreamOpen())
+ return -1;
+ OutputStream->Write(buf, len);
+ return len;
+}
+
+IOutputStream* TOutputFileManip::CreateStream(const TFile& file) {
+ return new TUnbufferedFileOutput(file);
+}
diff --git a/library/cpp/microbdb/file.h b/library/cpp/microbdb/file.h
new file mode 100644
index 0000000000..f7c7818375
--- /dev/null
+++ b/library/cpp/microbdb/file.h
@@ -0,0 +1,225 @@
+#pragma once
+
+#include "header.h"
+
+#include <library/cpp/deprecated/mapped_file/mapped_file.h>
+
+#include <util/generic/noncopyable.h>
+#include <util/stream/file.h>
+#include <util/system/filemap.h>
+
+#define FS_BLOCK_SIZE 512
+
+class TFileManipBase {
+protected:
+ TFileManipBase();
+
+ virtual ~TFileManipBase() {
+ }
+
+ i64 DoSeek(i64 offset, int whence, bool isStreamOpen);
+
+ int DoFileOpen(const TFile& file);
+
+ int DoFileClose();
+
+ int IsFileBased() const;
+
+ inline void SetFileBased(bool fileBased) {
+ FileBased = fileBased;
+ }
+
+ inline i64 DoGetPosition() const {
+ Y_ASSERT(FileBased);
+ return File.GetPosition();
+ }
+
+ inline i64 DoGetLength() const {
+ return (FileBased) ? File.GetLength() : -1;
+ }
+
+ inline void VerifyRandomAccess() const {
+ Y_VERIFY(FileBased, "non-file stream can not be accessed randomly");
+ }
+
+ inline i64 GetPosition() const {
+ return (i64)File.GetPosition();
+ }
+
+private:
+ TFile File;
+ bool FileBased;
+};
+
+class TInputFileManip: public TFileManipBase {
+public:
+ using TFileManipBase::GetPosition;
+
+ TInputFileManip();
+
+ int Open(const char* fname, bool direct = false);
+
+ int Open(IInputStream& input);
+
+ int Open(TAutoPtr<IInputStream> input);
+
+ int Init(const TFile& file);
+
+ int Close();
+
+ ssize_t Read(void* buf, unsigned len);
+
+ inline bool IsOpen() const {
+ return IsStreamOpen();
+ }
+
+ inline i64 GetLength() const {
+ return DoGetLength();
+ }
+
+ inline i64 Seek(i64 offset, int whence) {
+ return DoSeek(offset, whence, IsStreamOpen());
+ }
+
+ inline i64 RealSeek(i64 offset, int whence) {
+ return Seek(offset, whence);
+ }
+
+protected:
+ inline bool IsStreamOpen() const {
+ return !!InputStream;
+ }
+
+ inline int DoStreamOpen(IInputStream* input, bool fileBased = false) {
+ InputStream.Reset(input);
+ SetFileBased(fileBased);
+ return 0;
+ }
+
+ inline int DoStreamOpen(const TFile& file) {
+ int ret;
+ return (ret = DoFileOpen(file)) ? ret : DoStreamOpen(CreateStream(file), IsFileBased());
+ }
+
+ virtual IInputStream* CreateStream(const TFile& file);
+
+ inline bool DoClose() {
+ if (IsStreamOpen()) {
+ InputStream.Destroy();
+ return DoFileClose();
+ }
+ return 0;
+ }
+
+ THolder<IInputStream> InputStream;
+};
+
+class TMappedInputPageFile: private TNonCopyable {
+public:
+ TMappedInputPageFile();
+
+ ~TMappedInputPageFile();
+
+ inline int GetError() const {
+ return Error;
+ }
+
+ inline size_t GetPageSize() const {
+ return Pagesize;
+ }
+
+ inline int GetLastPage() const {
+ return Pagenum;
+ }
+
+ inline ui32 GetRecordSig() const {
+ return Recordsig;
+ }
+
+ inline bool IsOpen() const {
+ return Open;
+ }
+
+ inline char* GetData() const {
+ return Open ? (char*)Mappedfile.getData() : nullptr;
+ }
+
+ inline size_t GetSize() const {
+ return Open ? Mappedfile.getSize() : 0;
+ }
+
+protected:
+ int Init(const char* fname, ui32 recsig, ui32* gotRecordSig = nullptr, bool direct = false);
+
+ int Term();
+
+ TMappedFile Mappedfile;
+ size_t Pagesize;
+ int Error;
+ int Pagenum;
+ ui32 Recordsig;
+ bool Open;
+};
+
+class TOutputFileManip: public TFileManipBase {
+public:
+ TOutputFileManip();
+
+ int Open(const char* fname, EOpenMode mode = WrOnly | CreateAlways | ARW | AWOther);
+
+ int Open(IOutputStream& output);
+
+ int Open(TAutoPtr<IOutputStream> output);
+
+ int Init(const TFile& file);
+
+ int Rotate(const char* newfname);
+
+ int Write(const void* buf, unsigned len);
+
+ int Close();
+
+ inline bool IsOpen() const {
+ return IsStreamOpen();
+ }
+
+ inline i64 GetLength() const {
+ return DoGetLength();
+ }
+
+ inline i64 Seek(i64 offset, int whence) {
+ return DoSeek(offset, whence, IsStreamOpen());
+ }
+
+ inline i64 RealSeek(i64 offset, int whence) {
+ return Seek(offset, whence);
+ }
+
+protected:
+ inline bool IsStreamOpen() const {
+ return !!OutputStream;
+ }
+
+ inline int DoStreamOpen(IOutputStream* output, bool fileBased = false) {
+ OutputStream.Reset(output);
+ SetFileBased(fileBased);
+ return 0;
+ }
+
+ inline int DoStreamOpen(const TFile& file) {
+ int ret;
+ return (ret = DoFileOpen(file)) ? ret : DoStreamOpen(CreateStream(file), true);
+ }
+
+ virtual IOutputStream* CreateStream(const TFile& file);
+
+ inline bool DoClose() {
+ if (IsStreamOpen()) {
+ OutputStream.Destroy();
+ return DoFileClose();
+ }
+ return 0;
+ }
+
+ THolder<IOutputStream> OutputStream;
+};
diff --git a/library/cpp/microbdb/hashes.h b/library/cpp/microbdb/hashes.h
new file mode 100644
index 0000000000..bfd113c3ba
--- /dev/null
+++ b/library/cpp/microbdb/hashes.h
@@ -0,0 +1,250 @@
+#pragma once
+
+#include <library/cpp/on_disk/st_hash/static_hash.h>
+#include <util/system/sysstat.h>
+#include <util/stream/mem.h>
+#include <util/string/printf.h>
+#include <library/cpp/deprecated/fgood/fgood.h>
+
+#include "safeopen.h"
+
+/** This file currently implements creation of mappable read-only hash file.
+ Basic usage of these "static hashes" is defined in util/static_hash.h (see docs there).
+ Additional useful wrappers are available in util/static_hash_map.h
+
+ There are two ways to create mappable hash file:
+
+ A) Fill an THashMap/set structure in RAM, then dump it to disk.
+ This is usually done by save_hash_to_file* functions defined in static_hash.h
+ (see description in static_hash.h).
+
+ B) Prepare all data using external sorter, then create hash file straight on disk.
+ This approach is necessary when there isn't enough RAM to hold entire original THashMap.
+ Implemented in this file as TStaticHashBuilder class.
+
+ Current implementation's major drawback is that the size of the hash must be estimated
+ before the hash is built (bucketCount), which is not always possible.
+ Separate implementation with two sort passes is yet to be done.
+
+ Another problem is that maximum stored size of the element (maxRecSize) must also be
+ known in advance, because we use TDatSorterMemo, etc.
+ */
+
+template <class SizeType>
+struct TSthashTmpRec {
+ SizeType HashVal;
+ SizeType RecSize;
+ char Buf[1];
+ size_t SizeOf() const {
+ return &Buf[RecSize] - (char*)this;
+ }
+ bool operator<(const TSthashTmpRec& than) const {
+ return HashVal < than.HashVal;
+ }
+ static const ui32 RecordSig = 20100124 + sizeof(SizeType) - 4;
+};
+
+template <typename T>
+struct TReplaceMerger {
+ T operator()(const T& oldRecord, const T& newRecord) const {
+ Y_UNUSED(oldRecord);
+ return newRecord;
+ }
+};
+
+/** TStaticHashBuilder template parameters:
+ HashType - THashMap map/set type for which we construct corresponding mappable hash;
+ SizeType - type used to store offsets and length in resulting hash;
+ MergerType - type of object to process records with equal key (see TReplaceMerger for example);
+ */
+
+template <class HashType, class SizeType, class MergerType = TReplaceMerger<typename HashType::mapped_type>>
+struct TStaticHashBuilder {
+ const size_t SrtIOPageSz;
+ const size_t WrBufSz;
+ typedef TSthashTmpRec<SizeType> TIoRec;
+ typedef TSthashWriter<typename HashType::key_type, typename HashType::mapped_type, SizeType> TKeySaver;
+ typedef typename HashType::value_type TValueType;
+ typedef typename HashType::mapped_type TMappedType;
+ typedef typename HashType::key_type TKeyType;
+
+ TDatSorterMemo<TIoRec, TCompareByLess> Srt;
+ TBuffer IoRec, CurrentBlockRecs;
+ TKeySaver KeySaver;
+ typename HashType::hasher Hasher;
+ typename HashType::key_equal Equals;
+ MergerType merger;
+ TString HashFileName;
+ TString OurTmpDir;
+ size_t BucketCount;
+ int FreeBits;
+
+ // memSz is the Sorter buffer size;
+ // maxRecSize is the maximum size (as reported by size_for_st) of our record(s)
+ TStaticHashBuilder(size_t memSz, size_t maxRecSize)
+ : SrtIOPageSz((maxRecSize * 16 + 65535) & ~size_t(65535))
+ , WrBufSz(memSz / 16 >= SrtIOPageSz ? memSz / 16 : SrtIOPageSz)
+ , Srt("unused", memSz, SrtIOPageSz, WrBufSz, 0)
+ , IoRec(sizeof(TIoRec) + maxRecSize)
+ , CurrentBlockRecs(sizeof(TIoRec) + maxRecSize)
+ , BucketCount(0)
+ , FreeBits(0)
+ {
+ }
+
+ ~TStaticHashBuilder() {
+ Close();
+ }
+
+ // if tmpDir is supplied, it must exist;
+ // bucketCount should be HashBucketCount() of the (estimated) element count
+ void Open(const char* fname, size_t bucketCount, const char* tmpDir = nullptr) {
+ if (!tmpDir)
+ tmpDir = ~(OurTmpDir = Sprintf("%s.temp", fname));
+ Mkdir(tmpDir, MODE0775);
+ Srt.Open(tmpDir);
+ HashFileName = fname;
+ BucketCount = bucketCount;
+ int bitCount = 0;
+ while (((size_t)1 << bitCount) <= BucketCount && bitCount < int(8 * sizeof(size_t)))
+ ++bitCount;
+ FreeBits = 8 * sizeof(size_t) - bitCount;
+ }
+
+ void Push(const TValueType& rec) {
+ TIoRec* ioRec = MakeIoRec(rec);
+ Srt.Push(ioRec);
+ }
+ TIoRec* MakeIoRec(const TValueType& rec) {
+ TIoRec* ioRec = (TIoRec*)IoRec.Data();
+ size_t mask = (1 << FreeBits) - 1;
+ size_t hash = Hasher(rec.first);
+ ioRec->HashVal = ((hash % BucketCount) << FreeBits) + ((hash / BucketCount) & mask);
+
+ TMemoryOutput output(ioRec->Buf, IoRec.Capacity() - offsetof(TIoRec, Buf));
+ KeySaver.SaveRecord(&output, rec);
+ ioRec->RecSize = output.Buf() - ioRec->Buf;
+ return ioRec;
+ }
+
+ bool Merge(TVector<std::pair<TKeyType, TMappedType>>& records, size_t newRecordSize) {
+ TSthashIterator<const TKeyType, const TMappedType, typename HashType::hasher,
+ typename HashType::key_equal>
+ newPtr(CurrentBlockRecs.End() - newRecordSize);
+ for (size_t i = 0; i < records.size(); ++i) {
+ if (newPtr.KeyEquals(Equals, records[i].first)) {
+ TMappedType oldValue = records[i].second;
+ TMappedType newValue = newPtr.Value();
+ newValue = merger(oldValue, newValue);
+ records[i].second = newValue;
+ return true;
+ }
+ }
+ records.push_back(std::make_pair(newPtr.Key(), newPtr.Value()));
+ return false;
+ }
+
+ void PutRecord(const char* buf, size_t rec_size, TFILEPtr& f, SizeType& cur_off) {
+ f.fsput(buf, rec_size);
+ cur_off += rec_size;
+ }
+
+ void Finish() {
+ Srt.Sort();
+ // We use variant 1.
+ // Variant 1: read sorter once, write records, fseeks to write buckets
+ // (this doesn't allow fname to be stdout)
+ // Variant 2: read sorter (probably temp. file) twice: write buckets, then write records
+ // (this allows fname to be stdout but seems to be longer)
+ TFILEPtr f(HashFileName, "wb");
+ setvbuf(f, nullptr, _IOFBF, WrBufSz);
+ TVector<SizeType> bucketsBuf(WrBufSz, 0);
+ // prepare header (note: this code must be unified with save_stl.h)
+ typedef sthashtable_nvm_sv<typename HashType::hasher, typename HashType::key_equal, SizeType> sv_type;
+ sv_type sv = {Hasher, Equals, BucketCount, 0, 0};
+ // to do: m.b. use just the size of corresponding object?
+ SizeType cur_off = sizeof(sv_type) +
+ (sv.num_buckets + 1) * sizeof(SizeType);
+ SizeType bkt_wroff = sizeof(sv_type), bkt_bufpos = 0, prev_bkt = 0, prev_hash = (SizeType)-1;
+ bucketsBuf[bkt_bufpos++] = cur_off;
+ // if might me better to write many zeroes here
+ f.seek(cur_off, SEEK_SET);
+ TVector<std::pair<TKeyType, TMappedType>> currentBlock;
+ bool emptyFile = true;
+ size_t prevRecSize = 0;
+ // seek forward
+ while (true) {
+ const TIoRec* rec = Srt.Next();
+ if (currentBlock.empty() && !emptyFile) {
+ if (rec && prev_hash == rec->HashVal) {
+ Merge(currentBlock, prevRecSize);
+ } else {
+ // if there is only one record with this hash, don't recode it, just write
+ PutRecord(CurrentBlockRecs.Data(), prevRecSize, f, cur_off);
+ sv.num_elements++;
+ }
+ }
+ if (!rec || prev_hash != rec->HashVal) {
+ // write buckets table
+ for (size_t i = 0; i < currentBlock.size(); ++i) {
+ TIoRec* ioRec = MakeIoRec(TValueType(currentBlock[i]));
+ PutRecord(ioRec->Buf, ioRec->RecSize, f, cur_off);
+ }
+ sv.num_elements += currentBlock.size();
+ currentBlock.clear();
+ CurrentBlockRecs.Clear();
+ if (rec) {
+ prev_hash = rec->HashVal;
+ }
+ }
+ // note: prev_bkt's semantics here is 'cur_bkt - 1', thus we are actually cycling
+ // until cur_bkt == rec->HashVal *inclusively*
+ while (!rec || prev_bkt != (rec->HashVal >> FreeBits)) {
+ bucketsBuf[bkt_bufpos++] = cur_off;
+ if (bkt_bufpos == bucketsBuf.size()) {
+ f.seek(bkt_wroff, SEEK_SET);
+ size_t sz = bkt_bufpos * sizeof(bucketsBuf[0]);
+ if (f.write(bucketsBuf.begin(), 1, sz) != sz)
+ throw yexception() << "could not write " << sz << " bytes to " << ~HashFileName;
+ bkt_wroff += sz;
+ bkt_bufpos = 0;
+ f.seek(cur_off, SEEK_SET);
+ }
+ prev_bkt++;
+ if (!rec) {
+ break;
+ }
+ assert(prev_bkt < BucketCount);
+ }
+ if (!rec) {
+ break;
+ }
+ emptyFile = false;
+ CurrentBlockRecs.Append(rec->Buf, rec->RecSize);
+ if (!currentBlock.empty()) {
+ Merge(currentBlock, rec->RecSize);
+ } else {
+ prevRecSize = rec->RecSize;
+ }
+ }
+ // finish buckets table
+ f.seek(bkt_wroff, SEEK_SET);
+ size_t sz = bkt_bufpos * sizeof(bucketsBuf[0]);
+ if (sz && f.write(bucketsBuf.begin(), 1, sz) != sz)
+ throw yexception() << "could not write " << sz << " bytes to " << ~HashFileName;
+ bkt_wroff += sz;
+ for (; prev_bkt < BucketCount; prev_bkt++)
+ f.fput(cur_off);
+ // finally write header
+ sv.data_end_off = cur_off;
+ f.seek(0, SEEK_SET);
+ f.fput(sv);
+ f.close();
+ }
+
+ void Close() {
+ Srt.Close();
+ if (+OurTmpDir)
+ rmdir(~OurTmpDir);
+ }
+};
diff --git a/library/cpp/microbdb/header.cpp b/library/cpp/microbdb/header.cpp
new file mode 100644
index 0000000000..f4511d6fb6
--- /dev/null
+++ b/library/cpp/microbdb/header.cpp
@@ -0,0 +1,91 @@
+#include "header.h"
+
+#include <util/stream/output.h>
+#include <util/stream/format.h>
+
+TString ToString(EMbdbErrors error) {
+ TString ret;
+ switch (error) {
+ case MBDB_ALREADY_INITIALIZED:
+ ret = "already initialized";
+ break;
+ case MBDB_NOT_INITIALIZED:
+ ret = "not initialized";
+ break;
+ case MBDB_BAD_DESCRIPTOR:
+ ret = "bad descriptor";
+ break;
+ case MBDB_OPEN_ERROR:
+ ret = "open error";
+ break;
+ case MBDB_READ_ERROR:
+ ret = "read error";
+ break;
+ case MBDB_WRITE_ERROR:
+ ret = "write error";
+ break;
+ case MBDB_CLOSE_ERROR:
+ ret = "close error";
+ break;
+ case MBDB_EXPECTED_EOF:
+ ret = "expected eof";
+ break;
+ case MBDB_UNEXPECTED_EOF:
+ ret = "unxepected eof";
+ break;
+ case MBDB_BAD_FILENAME:
+ ret = "bad filename";
+ break;
+ case MBDB_BAD_METAPAGE:
+ ret = "bad metapage";
+ break;
+ case MBDB_BAD_RECORDSIG:
+ ret = "bad recordsig";
+ break;
+ case MBDB_BAD_FILE_SIZE:
+ ret = "bad file size";
+ break;
+ case MBDB_BAD_PAGESIG:
+ ret = "bad pagesig";
+ break;
+ case MBDB_BAD_PAGESIZE:
+ ret = "bad pagesize";
+ break;
+ case MBDB_BAD_PARM:
+ ret = "bad parm";
+ break;
+ case MBDB_BAD_SYNC:
+ ret = "bad sync";
+ break;
+ case MBDB_PAGE_OVERFLOW:
+ ret = "page overflow";
+ break;
+ case MBDB_NO_MEMORY:
+ ret = "no memory";
+ break;
+ case MBDB_MEMORY_LEAK:
+ ret = "memory leak";
+ break;
+ case MBDB_NOT_SUPPORTED:
+ ret = "not supported";
+ break;
+ default:
+ ret = "unknown";
+ break;
+ }
+ return ret;
+}
+
+TString ErrorMessage(int error, const TString& text, const TString& path, ui32 recordSig, ui32 gotRecordSig) {
+ TStringStream str;
+ str << text;
+ if (path.size())
+ str << " '" << path << "'";
+ str << ": " << ToString(static_cast<EMbdbErrors>(error));
+ if (recordSig && (!gotRecordSig || recordSig != gotRecordSig))
+ str << ". Expected RecordSig: " << Hex(recordSig, HF_ADDX);
+ if (recordSig && gotRecordSig && recordSig != gotRecordSig)
+ str << ", got: " << Hex(gotRecordSig, HF_ADDX);
+ str << ". Last system error text: " << LastSystemErrorText();
+ return str.Str();
+}
diff --git a/library/cpp/microbdb/header.h b/library/cpp/microbdb/header.h
new file mode 100644
index 0000000000..0951d610ea
--- /dev/null
+++ b/library/cpp/microbdb/header.h
@@ -0,0 +1,159 @@
+#pragma once
+
+#include <util/system/defaults.h>
+#include <util/generic/typetraits.h>
+#include <util/generic/string.h>
+#include <util/str_stl.h>
+
+#include <stdio.h>
+
+#define METASIZE (1u << 12)
+#define METASIG 0x12345678u
+#define PAGESIG 0x87654321u
+
+enum EMbdbErrors {
+ MBDB_ALREADY_INITIALIZED = 200,
+ MBDB_NOT_INITIALIZED = 201,
+ MBDB_BAD_DESCRIPTOR = 202,
+ MBDB_OPEN_ERROR = 203,
+ MBDB_READ_ERROR = 204,
+ MBDB_WRITE_ERROR = 205,
+ MBDB_CLOSE_ERROR = 206,
+ MBDB_EXPECTED_EOF = 207,
+ MBDB_UNEXPECTED_EOF = 208,
+ MBDB_BAD_FILENAME = 209,
+ MBDB_BAD_METAPAGE = 210,
+ MBDB_BAD_RECORDSIG = 211,
+ MBDB_BAD_FILE_SIZE = 212,
+ MBDB_BAD_PAGESIG = 213,
+ MBDB_BAD_PAGESIZE = 214,
+ MBDB_BAD_PARM = 215,
+ MBDB_BAD_SYNC = 216,
+ MBDB_PAGE_OVERFLOW = 217,
+ MBDB_NO_MEMORY = 218,
+ MBDB_MEMORY_LEAK = 219,
+ MBDB_NOT_SUPPORTED = 220
+};
+
+TString ToString(EMbdbErrors error);
+TString ErrorMessage(int error, const TString& text, const TString& path = TString(), ui32 recordSig = 0, ui32 gotRecordSig = 0);
+
+enum EPageFormat {
+ MBDB_FORMAT_RAW = 0,
+ MBDB_FORMAT_COMPRESSED = 1,
+ MBDB_FORMAT_NULL = 255
+};
+
+enum ECompressionAlgorithm {
+ MBDB_COMPRESSION_ZLIB = 1,
+ MBDB_COMPRESSION_FASTLZ = 2,
+ MBDB_COMPRESSION_SNAPPY = 3
+};
+
+struct TDatMetaPage {
+ ui32 MetaSig;
+ ui32 RecordSig;
+ ui32 PageSize;
+};
+
+struct TDatPage {
+ ui32 RecNum; //!< number of records on this page
+ ui32 PageSig;
+ ui32 Format : 2; //!< one of EPageFormat
+ ui32 Reserved : 30;
+};
+
+/// Additional page header with compression info
+struct TCompressedPage {
+ ui32 BlockCount;
+ ui32 Algorithm : 4;
+ ui32 Version : 4;
+ ui32 Reserved : 24;
+};
+
+namespace NMicroBDB {
+ /// Header of compressed block
+ struct TCompressedHeader {
+ ui32 Compressed;
+ ui32 Original; /// original size of block
+ ui32 Count; /// number of records in block
+ ui32 Reserved;
+ };
+
+ Y_HAS_MEMBER(AssertValid);
+
+ template <typename T, bool TVal>
+ struct TAssertValid {
+ void operator()(const T*) {
+ }
+ };
+
+ template <typename T>
+ struct TAssertValid<T, true> {
+ void operator()(const T* rec) {
+ return rec->AssertValid();
+ }
+ };
+
+ template <typename T>
+ void AssertValid(const T* rec) {
+ return NMicroBDB::TAssertValid<T, NMicroBDB::THasAssertValid<T>::value>()(rec);
+ }
+
+ Y_HAS_MEMBER(SizeOf);
+
+ template <typename T, bool TVal>
+ struct TGetSizeOf;
+
+ template <typename T>
+ struct TGetSizeOf<T, true> {
+ size_t operator()(const T* rec) {
+ return rec->SizeOf();
+ }
+ };
+
+ template <typename T>
+ struct TGetSizeOf<T, false> {
+ size_t operator()(const T*) {
+ return sizeof(T);
+ }
+ };
+
+ inline char* GetFirstRecord(const TDatPage* page) {
+ switch (page->Format) {
+ case MBDB_FORMAT_RAW:
+ return (char*)page + sizeof(TDatPage);
+ case MBDB_FORMAT_COMPRESSED:
+ // Первая запись на сжатой странице сохраняется несжатой
+ // сразу же после всех заголовков.
+ // Алгоритм сохранения смотреть в TOutputRecordIterator::FlushBuffer
+ return (char*)page + sizeof(TDatPage) + sizeof(TCompressedPage) + sizeof(NMicroBDB::TCompressedHeader);
+ }
+ return (char*)nullptr;
+ }
+}
+
+template <typename T>
+size_t SizeOf(const T* rec) {
+ return NMicroBDB::TGetSizeOf<T, NMicroBDB::THasSizeOf<T>::value>()(rec);
+}
+
+template <typename T>
+size_t MaxSizeOf() {
+ return sizeof(T);
+}
+
+static inline int DatNameToIdx(char iname[/*FILENAME_MAX*/], const char* dname) {
+ if (!dname || !*dname)
+ return MBDB_BAD_FILENAME;
+ const char* ptr;
+ if (!(ptr = strrchr(dname, '/')))
+ ptr = dname;
+ if (!(ptr = strrchr(ptr, '.')))
+ ptr = strchr(dname, 0);
+ if (ptr - dname > FILENAME_MAX - 5)
+ return MBDB_BAD_FILENAME;
+ memcpy(iname, dname, ptr - dname);
+ strcpy(iname + (ptr - dname), ".idx");
+ return 0;
+}
diff --git a/library/cpp/microbdb/heap.h b/library/cpp/microbdb/heap.h
new file mode 100644
index 0000000000..ef5a53534c
--- /dev/null
+++ b/library/cpp/microbdb/heap.h
@@ -0,0 +1,143 @@
+#pragma once
+
+#include "header.h"
+#include "extinfo.h"
+
+#include <util/generic/vector.h>
+
+#include <errno.h>
+
+///////////////////////////////////////////////////////////////////////////////
+
+/// Default comparator
+template <class TVal>
+struct TCompareByLess {
+ inline bool operator()(const TVal* a, const TVal* b) const {
+ return TLess<TVal>()(*a, *b);
+ }
+};
+
+///////////////////////////////////////////////////////////////////////////////
+
+template <class TVal, class TIterator, class TCompare = TCompareByLess<TVal>>
+class THeapIter {
+public:
+ int Init(TIterator** iters, int count) {
+ Term();
+ if (!count)
+ return 0;
+ if (!(Heap = (TIterator**)malloc(count * sizeof(TIterator*))))
+ return ENOMEM;
+
+ Count = count;
+ count = 0;
+ while (count < Count)
+ if (count && !(*iters)->Next()) { //here first TIterator is NOT initialized!
+ Count--;
+ iters++;
+ } else {
+ Heap[count++] = *iters++;
+ }
+ count = Count / 2;
+ while (--count > 0) //Heap[0] is not changed!
+ Sift(count, Count); //do not try to replace this code by make_heap
+ return 0;
+ }
+
+ int Init(TIterator* iters, int count) {
+ TVector<TIterator*> a(count);
+ for (int i = 0; i < count; ++i)
+ a[i] = &iters[i];
+ return Init(&a[0], count);
+ }
+
+ THeapIter()
+ : Heap(nullptr)
+ , Count(0)
+ {
+ }
+
+ THeapIter(TIterator* a, TIterator* b)
+ : Heap(nullptr)
+ , Count(0)
+ {
+ TIterator* arr[] = {a, b};
+ if (Init(arr, 2))
+ ythrow yexception() << "can't Init THeapIter";
+ }
+
+ THeapIter(TVector<TIterator>& v)
+ : Heap(nullptr)
+ , Count(0)
+ {
+ if (Init(&v[0], v.size())) {
+ ythrow yexception() << "can't Init THeapIter";
+ }
+ }
+
+ ~THeapIter() {
+ Term();
+ }
+
+ inline const TVal* Current() const {
+ if (!Count)
+ return nullptr;
+ return (*Heap)->Current();
+ }
+
+ inline const TIterator* CurrentIter() const {
+ return *Heap;
+ }
+
+ //for ends of last file will use Heap[0] = Heap[0] ! and
+ //returns Current of eof so Current of eof MUST return NULL
+ //possible this is bug and need fixing
+ const TVal* Next() {
+ if (!Count)
+ return nullptr;
+ if (!(*Heap)->Next()) //on first call unitialized first TIterator
+ *Heap = Heap[--Count]; //will be correctly initialized
+
+ if (Count == 2) {
+ if (TCompare()(Heap[1]->Current(), Heap[0]->Current()))
+ DoSwap(Heap[1], Heap[0]);
+ } else
+ Sift(0, Count);
+
+ return Current();
+ }
+
+ inline bool GetExtInfo(typename TExtInfoType<TVal>::TResult* extInfo) const {
+ return (*Heap)->GetExtInfo(extInfo);
+ }
+
+ inline const ui8* GetExtInfoRaw(size_t* len) const {
+ return (*Heap)->GetExtInfoRaw(len);
+ }
+
+ void Term() {
+ ::free(Heap);
+ Heap = nullptr;
+ Count = 0;
+ }
+
+protected:
+ void Sift(int node, int end) {
+ TIterator* x = Heap[node];
+ int son;
+ for (son = 2 * node + 1; son < end; node = son, son = 2 * node + 1) {
+ if (son < (end - 1) && TCompare()(Heap[son + 1]->Current(), Heap[son]->Current()))
+ son++;
+ if (TCompare()(Heap[son]->Current(), x->Current()))
+ Heap[node] = Heap[son];
+ else
+ break;
+ }
+ Heap[node] = x;
+ }
+
+ TIterator** Heap;
+ int Count;
+};
+
+///////////////////////////////////////////////////////////////////////////////
diff --git a/library/cpp/microbdb/input.h b/library/cpp/microbdb/input.h
new file mode 100644
index 0000000000..a214ba6e8a
--- /dev/null
+++ b/library/cpp/microbdb/input.h
@@ -0,0 +1,1027 @@
+#pragma once
+
+#include "header.h"
+#include "file.h"
+#include "reader.h"
+
+#include <util/system/maxlen.h>
+#include <util/system/event.h>
+#include <util/system/thread.h>
+
+#include <thread>
+
+#include <sys/uio.h>
+
+#include <errno.h>
+
+template <class TFileManip>
+inline ssize_t Readv(TFileManip& fileManip, const struct iovec* iov, int iovcnt) {
+ ssize_t read_count = 0;
+ for (int n = 0; n < iovcnt; n++) {
+ ssize_t last_read = fileManip.Read(iov[n].iov_base, iov[n].iov_len);
+ if (last_read < 0)
+ return -1;
+ read_count += last_read;
+ }
+ return read_count;
+}
+
+template <class TVal, typename TBasePageIter>
+class TInputRecordIterator: public TBasePageIter {
+ typedef THolder<NMicroBDB::IBasePageReader<TVal>> TReaderHolder;
+
+public:
+ typedef TBasePageIter TPageIter;
+
+ TInputRecordIterator() {
+ Init();
+ }
+
+ ~TInputRecordIterator() {
+ Term();
+ }
+
+ const TVal* Current() const {
+ return Rec;
+ }
+
+ bool GetExtInfo(typename TExtInfoType<TVal>::TResult* extInfo) const {
+ if (!Rec)
+ return false;
+ return Reader->GetExtInfo(extInfo);
+ }
+
+ const ui8* GetExtInfoRaw(size_t* len) const {
+ if (!Rec)
+ return nullptr;
+ return Reader->GetExtInfoRaw(len);
+ }
+
+ size_t GetRecSize() const {
+ return Reader->GetRecSize();
+ }
+
+ size_t GetExtSize() const {
+ return Reader->GetExtSize();
+ }
+
+ const TVal* Next() {
+ if (RecNum)
+ --RecNum;
+ else {
+ TDatPage* page = TPageIter::Next();
+ if (!page) {
+ if (TPageIter::IsFrozen() && Reader.Get())
+ Reader->SetClearFlag();
+ return Rec = nullptr;
+ } else if (!!SelectReader())
+ return Rec = nullptr;
+ RecNum = TPageIter::Current()->RecNum - 1;
+ }
+ return Rec = Reader->Next();
+ }
+
+ // Skip(0) == Current(); Skip(1) == Next()
+ const TVal* Skip(int& num) {
+ // Y_ASSERT(num >= 0); ? otherwise it gets into infinite loop
+ while (num > RecNum) {
+ num -= RecNum + 1;
+ if (!TPageIter::Next() || !!SelectReader()) {
+ RecNum = 0;
+ return Rec = nullptr;
+ }
+ RecNum = TPageIter::Current()->RecNum - 1;
+ Rec = Reader->Next();
+ }
+ ++num;
+ while (--num)
+ Next();
+ return Rec;
+ }
+
+ // begin reading from next page
+ void Reset() {
+ Rec = NULL;
+ RecNum = 0;
+ if (Reader.Get())
+ Reader->Reset();
+ }
+
+protected:
+ int Init() {
+ Rec = nullptr;
+ RecNum = 0;
+ Format = MBDB_FORMAT_NULL;
+ return 0;
+ }
+
+ int Term() {
+ Reader.Reset(nullptr);
+ Format = MBDB_FORMAT_NULL;
+ Rec = nullptr;
+ RecNum = 0;
+ return 0;
+ }
+
+ const TVal* GotoPage(int pageno) {
+ if (!TPageIter::GotoPage(pageno) || !!SelectReader())
+ return Rec = nullptr;
+ RecNum = TPageIter::Current()->RecNum - 1;
+ return Rec = Reader->Next();
+ }
+
+ int SelectReader() {
+ if (!TPageIter::Current())
+ return MBDB_UNEXPECTED_EOF;
+ if (ui32(Format) != TPageIter::Current()->Format) {
+ switch (TPageIter::Current()->Format) {
+ case MBDB_FORMAT_RAW:
+ Reader.Reset(new NMicroBDB::TRawPageReader<TVal, TPageIter>(this));
+ break;
+ case MBDB_FORMAT_COMPRESSED:
+ Reader.Reset(new NMicroBDB::TCompressedReader<TVal, TPageIter>(this));
+ break;
+ default:
+ return MBDB_NOT_SUPPORTED;
+ }
+ Format = EPageFormat(TPageIter::Current()->Format);
+ } else {
+ Y_ASSERT(Reader.Get() != nullptr);
+ Reader->Reset();
+ }
+ return 0;
+ }
+
+ const TVal* Rec;
+ TReaderHolder Reader;
+ int RecNum; //!< number of records on the current page after the current record
+ EPageFormat Format;
+};
+
+template <class TBaseReader>
+class TInputPageIterator: public TBaseReader {
+public:
+ typedef TBaseReader TReader;
+
+ TInputPageIterator()
+ : Buf(nullptr)
+ {
+ Term();
+ }
+
+ ~TInputPageIterator() {
+ Term();
+ }
+
+ TDatPage* Current() {
+ return CurPage;
+ }
+
+ int Freeze() {
+ return (Frozen = (PageNum == -1) ? 0 : PageNum);
+ }
+
+ void Unfreeze() {
+ Frozen = -1;
+ }
+
+ inline int IsFrozen() const {
+ return Frozen + 1;
+ }
+
+ inline size_t GetPageSize() const {
+ return TReader::GetPageSize();
+ }
+
+ inline int GetPageNum() const {
+ return PageNum;
+ }
+
+ inline int IsEof() const {
+ return Eof;
+ }
+
+ TDatPage* Next() {
+ if (PageNum >= Maxpage && ReadBuf()) {
+ Eof = Eof ? Eof : TReader::IsEof();
+ return CurPage = nullptr;
+ }
+ return CurPage = (TDatPage*)(Buf + ((++PageNum) % Bufpages) * GetPageSize());
+ }
+
+ TDatPage* GotoPage(int pageno) {
+ if (pageno <= Maxpage && pageno >= (Maxpage - Pages + 1)) {
+ PageNum = pageno;
+ return CurPage = (TDatPage*)(Buf + (PageNum % Bufpages) * GetPageSize());
+ }
+ if (IsFrozen() || TReader::GotoPage(pageno))
+ return nullptr;
+ Maxpage = PageNum = pageno - 1;
+ Eof = 0;
+ return Next();
+ }
+
+protected:
+ int Init(size_t pages, int pagesOrBytes) {
+ Term();
+ if (pagesOrBytes == -1)
+ Bufpages = TReader::GetLastPage();
+ else if (pagesOrBytes)
+ Bufpages = pages;
+ else
+ Bufpages = pages / GetPageSize();
+ if (!TReader::GetLastPage()) {
+ Bufpages = 0;
+ assert(Eof == 1);
+ return 0;
+ }
+ int lastPage = TReader::GetLastPage();
+ if (lastPage >= 0)
+ Bufpages = (int)Min(lastPage, Bufpages);
+ Bufpages = Max(2, Bufpages);
+ Eof = 0;
+ ABuf.Alloc(Bufpages * GetPageSize());
+ return (Buf = ABuf.Begin()) ? 0 : ENOMEM;
+ // return (Buf = (char*)malloc(Bufpages * GetPageSize())) ? 0 : ENOMEM;
+ }
+
+ int Term() {
+ // free(Buf);
+ ABuf.Dealloc();
+ Buf = nullptr;
+ Maxpage = PageNum = Frozen = -1;
+ Bufpages = 0;
+ Pages = 0;
+ Eof = 1;
+ CurPage = nullptr;
+ return 0;
+ }
+
+ int ReadBuf() {
+ int nvec;
+ iovec vec[2];
+ int maxpage = (Frozen == -1 ? Maxpage + 1 : Frozen) + Bufpages - 1;
+ int minpage = Maxpage + 1;
+ if (maxpage < minpage)
+ return EAGAIN;
+ minpage %= Bufpages;
+ maxpage %= Bufpages;
+ if (maxpage < minpage) {
+ vec[0].iov_base = Buf + GetPageSize() * minpage;
+ vec[0].iov_len = GetPageSize() * (Bufpages - minpage);
+ vec[1].iov_base = Buf;
+ vec[1].iov_len = GetPageSize() * (maxpage + 1);
+ nvec = 2;
+ } else {
+ vec[0].iov_base = Buf + GetPageSize() * minpage;
+ vec[0].iov_len = GetPageSize() * (maxpage - minpage + 1);
+ nvec = 1;
+ }
+ TReader::ReadPages(vec, nvec, &Pages);
+ Maxpage += Pages;
+ return !Pages;
+ }
+
+ int Maxpage, PageNum, Frozen, Bufpages, Eof, Pages;
+ TDatPage* CurPage;
+ // TMappedArray<char> ABuf;
+ TMappedAllocation ABuf;
+ char* Buf;
+};
+
+template <class TBaseReader>
+class TInputPageIteratorMT: public TBaseReader {
+public:
+ typedef TBaseReader TReader;
+
+ TInputPageIteratorMT()
+ : CurBuf(0)
+ , CurReadBuf(0)
+ , Buf(nullptr)
+ {
+ Term();
+ }
+
+ ~TInputPageIteratorMT() {
+ Term();
+ }
+
+ TDatPage* Current() {
+ return CurPage;
+ }
+
+ int Freeze() {
+ return (Frozen = (PageNum == -1) ? 0 : PageNum);
+ }
+
+ void Unfreeze() {
+ Frozen = -1;
+ }
+
+ inline int IsFrozen() const {
+ return Frozen + 1;
+ }
+
+ inline size_t GetPageSize() const {
+ return TReader::GetPageSize();
+ }
+
+ inline int GetPageNum() const {
+ return PageNum;
+ }
+
+ inline int IsEof() const {
+ return Eof;
+ }
+
+ TDatPage* Next() {
+ if (Eof)
+ return CurPage = nullptr;
+ if (PageNum >= Maxpage && ReadBuf()) {
+ Eof = Eof ? Eof : TReader::IsEof();
+ return CurPage = nullptr;
+ }
+ return CurPage = (TDatPage*)(Buf + ((++PageNum) % Bufpages) * GetPageSize());
+ }
+
+ TDatPage* GotoPage(int pageno) {
+ if (pageno <= Maxpage && pageno >= (Maxpage - Pages + 1)) {
+ PageNum = pageno;
+ return CurPage = (TDatPage*)(Buf + (PageNum % Bufpages) * GetPageSize());
+ }
+ if (IsFrozen() || TReader::GotoPage(pageno))
+ return nullptr;
+ Maxpage = PageNum = pageno - 1;
+ Eof = 0;
+ return Next();
+ }
+
+ void ReadPages() {
+ // fprintf(stderr, "ReadPages started\n");
+ bool eof = false;
+ while (!eof) {
+ QEvent[CurBuf].Wait();
+ if (Finish)
+ return;
+ int pages = ReadCurBuf(Bufs[CurBuf]);
+ PagesM[CurBuf] = pages;
+ eof = !pages;
+ AEvent[CurBuf].Signal();
+ CurBuf ^= 1;
+ }
+ }
+
+protected:
+ int Init(size_t pages, int pagesOrBytes) {
+ Term();
+ if (pagesOrBytes == -1)
+ Bufpages = TReader::GetLastPage();
+ else if (pagesOrBytes)
+ Bufpages = pages;
+ else
+ Bufpages = pages / GetPageSize();
+ if (!TReader::GetLastPage()) {
+ Bufpages = 0;
+ assert(Eof == 1);
+ return 0;
+ }
+ int lastPage = TReader::GetLastPage();
+ if (lastPage >= 0)
+ Bufpages = (int)Min(lastPage, Bufpages);
+ Bufpages = Max(2, Bufpages);
+ Eof = 0;
+ ABuf.Alloc(Bufpages * GetPageSize() * 2);
+ Bufs[0] = ABuf.Begin();
+ Bufs[1] = Bufs[0] + Bufpages * GetPageSize();
+ // return (Buf = (char*)malloc(Bufpages * GetPageSize())) ? 0 : ENOMEM;
+ Finish = false;
+ ReadThread = std::thread([this]() {
+ TThread::SetCurrentThreadName("DatReader");
+ ReadPages();
+ });
+ QEvent[0].Signal();
+ return Bufs[0] ? 0 : ENOMEM;
+ }
+
+ void StopThread() {
+ Finish = true;
+ QEvent[0].Signal();
+ QEvent[1].Signal();
+ ReadThread.join();
+ }
+
+ int Term() {
+ // free(Buf);
+ if (ReadThread.joinable())
+ StopThread();
+ ABuf.Dealloc();
+ Buf = nullptr;
+ Bufs[0] = nullptr;
+ Bufs[1] = nullptr;
+ Maxpage = MaxpageR = PageNum = Frozen = -1;
+ Bufpages = 0;
+ Pages = 0;
+ Eof = 1;
+ CurPage = nullptr;
+ return 0;
+ }
+
+ int ReadCurBuf(char* buf) {
+ int nvec;
+ iovec vec[2];
+ int maxpage = (Frozen == -1 ? MaxpageR + 1 : Frozen) + Bufpages - 1;
+ int minpage = MaxpageR + 1;
+ if (maxpage < minpage)
+ return EAGAIN;
+ minpage %= Bufpages;
+ maxpage %= Bufpages;
+ if (maxpage < minpage) {
+ vec[0].iov_base = buf + GetPageSize() * minpage;
+ vec[0].iov_len = GetPageSize() * (Bufpages - minpage);
+ vec[1].iov_base = buf;
+ vec[1].iov_len = GetPageSize() * (maxpage + 1);
+ nvec = 2;
+ } else {
+ vec[0].iov_base = buf + GetPageSize() * minpage;
+ vec[0].iov_len = GetPageSize() * (maxpage - minpage + 1);
+ nvec = 1;
+ }
+ int pages;
+ TReader::ReadPages(vec, nvec, &pages);
+ MaxpageR += pages;
+ return pages;
+ }
+
+ int ReadBuf() {
+ QEvent[CurReadBuf ^ 1].Signal();
+ AEvent[CurReadBuf].Wait();
+ Buf = Bufs[CurReadBuf];
+ Maxpage += (Pages = PagesM[CurReadBuf]);
+ CurReadBuf ^= 1;
+ return !Pages;
+ }
+
+ int Maxpage, MaxpageR, PageNum, Frozen, Bufpages, Eof, Pages;
+ TDatPage* CurPage;
+ // TMappedArray<char> ABuf;
+ ui32 CurBuf;
+ ui32 CurReadBuf;
+ TMappedAllocation ABuf;
+ char* Buf;
+ char* Bufs[2];
+ ui32 PagesM[2];
+ TAutoEvent QEvent[2];
+ TAutoEvent AEvent[2];
+ std::thread ReadThread;
+ bool Finish;
+};
+
+template <typename TFileManip>
+class TInputPageFileImpl: private TNonCopyable {
+protected:
+ TFileManip FileManip;
+
+public:
+ TInputPageFileImpl()
+ : Pagesize(0)
+ , Fd(-1)
+ , Eof(1)
+ , Error(0)
+ , Pagenum(0)
+ , Recordsig(0)
+ {
+ Term();
+ }
+
+ ~TInputPageFileImpl() {
+ Term();
+ }
+
+ inline int IsEof() const {
+ return Eof;
+ }
+
+ inline int GetError() const {
+ return Error;
+ }
+
+ inline size_t GetPageSize() const {
+ return Pagesize;
+ }
+
+ inline int GetLastPage() const {
+ return Pagenum;
+ }
+
+ inline ui32 GetRecordSig() const {
+ return Recordsig;
+ }
+
+ inline bool IsOpen() const {
+ return FileManip.IsOpen();
+ }
+
+protected:
+ int Init(const char* fname, ui32 recsig, ui32* gotrecsig = nullptr, bool direct = false) {
+ Error = FileManip.Open(fname, direct);
+ return Error ? Error : Init(TFile(), recsig, gotrecsig);
+ }
+
+ int Init(const TFile& file, ui32 recsig, ui32* gotrecsig = nullptr) {
+ if (!file.IsOpen() && !FileManip.IsOpen())
+ return MBDB_NOT_INITIALIZED;
+ if (file.IsOpen() && FileManip.IsOpen())
+ return MBDB_ALREADY_INITIALIZED;
+ if (file.IsOpen()) {
+ Error = FileManip.Init(file);
+ if (Error)
+ return Error;
+ }
+
+ // TArrayHolder<ui8> buf(new ui8[METASIZE + FS_BLOCK_SIZE]);
+ // ui8* ptr = (buf.Get() + FS_BLOCK_SIZE - ((ui64)buf.Get() & (FS_BLOCK_SIZE - 1)));
+ TMappedArray<ui8> buf;
+ buf.Create(METASIZE);
+ ui8* ptr = &buf[0];
+ TDatMetaPage* meta = (TDatMetaPage*)ptr;
+ ssize_t size = METASIZE;
+ ssize_t ret;
+ while (size && (ret = FileManip.Read(ptr, (unsigned)size)) > 0) {
+ Y_ASSERT(ret <= size);
+ size -= ret;
+ ptr += ret;
+ }
+ if (size) {
+ FileManip.Close();
+ return Error = MBDB_BAD_METAPAGE;
+ }
+ if (gotrecsig)
+ *gotrecsig = meta->RecordSig;
+ return Init(TFile(), meta, recsig);
+ }
+
+ int Init(TAutoPtr<IInputStream> input, ui32 recsig, ui32* gotrecsig = nullptr) {
+ if (!input && !FileManip.IsOpen())
+ return MBDB_NOT_INITIALIZED;
+ if (FileManip.IsOpen())
+ return MBDB_ALREADY_INITIALIZED;
+
+ Error = FileManip.Open(input);
+ if (Error)
+ return Error;
+
+ TArrayHolder<ui8> buf(new ui8[METASIZE]);
+ ui8* ptr = buf.Get();
+ ssize_t size = METASIZE;
+ ssize_t ret;
+ while (size && (ret = FileManip.Read(ptr, (unsigned)size)) > 0) {
+ Y_ASSERT(ret <= size);
+ size -= ret;
+ ptr += ret;
+ }
+ if (size) {
+ FileManip.Close();
+ return Error = MBDB_BAD_METAPAGE;
+ }
+ TDatMetaPage* meta = (TDatMetaPage*)buf.Get();
+ if (gotrecsig)
+ *gotrecsig = meta->RecordSig;
+ return Init(TFile(), meta, recsig);
+ }
+
+ int Init(const TFile& file, const TDatMetaPage* meta, ui32 recsig) {
+ if (!file.IsOpen() && !FileManip.IsOpen())
+ return MBDB_NOT_INITIALIZED;
+ if (file.IsOpen() && FileManip.IsOpen())
+ return MBDB_ALREADY_INITIALIZED;
+ if (file.IsOpen()) {
+ Error = FileManip.Init(file);
+ if (Error)
+ return Error;
+ }
+
+ if (meta->MetaSig != METASIG)
+ Error = MBDB_BAD_METAPAGE;
+ else if (meta->RecordSig != recsig)
+ Error = MBDB_BAD_RECORDSIG;
+
+ if (Error) {
+ FileManip.Close();
+ return Error;
+ }
+
+ i64 flength = FileManip.GetLength();
+ if (flength >= 0) {
+ i64 fsize = flength;
+ fsize -= METASIZE;
+ if (fsize % meta->PageSize)
+ return Error = MBDB_BAD_FILE_SIZE;
+ Pagenum = (int)(fsize / meta->PageSize);
+ } else {
+ Pagenum = -1;
+ }
+ Pagesize = meta->PageSize;
+ Recordsig = meta->RecordSig;
+ Error = Eof = 0;
+ return Error;
+ }
+
+ int ReadPages(iovec* vec, int nvec, int* pages) {
+ *pages = 0;
+
+ if (Eof || Error)
+ return Error;
+
+ ssize_t size = 0, delta = 0, total = 0;
+ iovec* pvec = vec;
+ int vsize = nvec;
+
+ while (vsize && (size = Readv(FileManip, pvec, (int)Min(vsize, 16))) > 0) {
+ total += size;
+ if (delta) {
+ size += delta;
+ pvec->iov_len += delta;
+ pvec->iov_base = (char*)pvec->iov_base - delta;
+ delta = 0;
+ }
+ while (size) {
+ if ((size_t)size >= pvec->iov_len) {
+ size -= pvec->iov_len;
+ ++pvec;
+ --vsize;
+ } else {
+ delta = size;
+ pvec->iov_len -= size;
+ pvec->iov_base = (char*)pvec->iov_base + size;
+ size = 0;
+ }
+ }
+ }
+ if (delta) {
+ pvec->iov_len += delta;
+ pvec->iov_base = (char*)pvec->iov_base - delta;
+ }
+ if (size < 0)
+ return Error = errno ? errno : MBDB_READ_ERROR;
+ if (total % Pagesize)
+ return Error = MBDB_BAD_FILE_SIZE;
+ if (vsize)
+ Eof = 1;
+ *pages = total / Pagesize; // it would be better to assign it after the for-loops
+ for (; total; ++vec, total -= size)
+ for (size = 0; size < total && (size_t)size < vec->iov_len; size += Pagesize)
+ if (((TDatPage*)((char*)vec->iov_base + size))->PageSig != PAGESIG)
+ return Error = MBDB_BAD_PAGESIG;
+ return Error;
+ }
+
+ int GotoPage(int page) {
+ if (Error)
+ return Error;
+ Eof = 0;
+ i64 offset = (i64)page * Pagesize + METASIZE;
+ if (offset != FileManip.Seek(offset, SEEK_SET))
+ Error = MBDB_BAD_FILE_SIZE;
+ return Error;
+ }
+
+ int Term() {
+ return FileManip.Close();
+ }
+
+ size_t Pagesize;
+ int Fd;
+ int Eof;
+ int Error;
+ int Pagenum; //!< number of pages in this file
+ ui32 Recordsig;
+};
+
+template <class TBaseReader>
+class TMappedInputPageIterator: public TBaseReader {
+public:
+ typedef TBaseReader TReader;
+
+ TMappedInputPageIterator() {
+ Term();
+ }
+
+ ~TMappedInputPageIterator() {
+ Term();
+ }
+
+ TDatPage* Current() {
+ return CurPage;
+ }
+
+ inline size_t GetPageSize() const {
+ return TReader::GetPageSize();
+ }
+
+ inline int GetPageNum() const {
+ return PageNum;
+ }
+
+ inline int IsEof() const {
+ return Eof;
+ }
+
+ inline int IsFrozen() const {
+ return 0;
+ }
+
+ TDatPage* Next() {
+ i64 pos = (i64)(++PageNum) * GetPageSize() + METASIZE;
+ if (pos < 0 || pos >= (i64)TReader::GetSize()) {
+ Eof = 1;
+ return CurPage = nullptr;
+ }
+ return CurPage = (TDatPage*)((char*)TReader::GetData() + pos);
+ }
+
+protected:
+ int Init(size_t /*pages*/, int /*pagesOrBytes*/) {
+ Term();
+ Eof = 0;
+ return 0;
+ }
+
+ int Term() {
+ PageNum = -1;
+ Eof = 1;
+ CurPage = nullptr;
+ return 0;
+ }
+
+ TDatPage* GotoPage(int pageno) {
+ PageNum = pageno - 1;
+ Eof = 0;
+ return Next();
+ }
+
+ int PageNum, Eof, Pages, Pagenum;
+ TDatPage* CurPage;
+};
+
+using TInputPageFile = TInputPageFileImpl<TInputFileManip>;
+
+template <class TVal,
+ typename TBaseRecIter = TInputRecordIterator<TVal, TInputPageIterator<TInputPageFile>>>
+class TInDatFileImpl: public TBaseRecIter {
+public:
+ typedef TBaseRecIter TRecIter;
+ typedef typename TRecIter::TPageIter TPageIter;
+ typedef typename TRecIter::TPageIter::TReader TReader;
+ using TRecIter::GotoPage;
+
+ int Open(const char* fname, size_t pages = 1, int pagesOrBytes = 1, ui32* gotRecordSig = nullptr, bool direct = false) {
+ int ret = TReader::Init(fname, TVal::RecordSig, gotRecordSig, direct);
+ return ret ? ret : Open2(pages, pagesOrBytes);
+ }
+
+ int Open(const TFile& file, size_t pages = 1, int pagesOrBytes = 1, ui32* gotRecordSig = nullptr) {
+ int ret = TReader::Init(file, TVal::RecordSig, gotRecordSig);
+ return ret ? ret : Open2(pages, pagesOrBytes);
+ }
+
+ int Open(TAutoPtr<IInputStream> input, size_t pages = 1, int pagesOrBytes = 1, ui32* gotRecordSig = nullptr) {
+ int ret = TReader::Init(input, TVal::RecordSig, gotRecordSig);
+ return ret ? ret : Open2(pages, pagesOrBytes);
+ }
+
+ int Open(const TFile& file, const TDatMetaPage* meta, size_t pages = 1, int pagesOrBytes = 1) {
+ int ret = TReader::Init(file, meta, TVal::RecordSig);
+ return ret ? ret : Open2(pages, pagesOrBytes);
+ }
+
+ int Close() {
+ int ret1 = TRecIter::Term();
+ int ret2 = TPageIter::Term();
+ int ret3 = TReader::Term();
+ return ret1 ? ret1 : ret2 ? ret2 : ret3;
+ }
+
+ const TVal* GotoLastPage() {
+ return TReader::GetLastPage() <= 0 ? nullptr : TRecIter::GotoPage(TReader::GetLastPage() - 1);
+ }
+
+private:
+ int Open2(size_t pages, int pagesOrBytes) {
+ int ret = TPageIter::Init(pages, pagesOrBytes);
+ if (!ret)
+ ret = TRecIter::Init();
+ if (ret)
+ Close();
+ return ret;
+ }
+};
+
+template <class TVal>
+class TInIndexFile: protected TInDatFileImpl<TVal> {
+ typedef TInDatFileImpl<TVal> TDatFile;
+ typedef typename TDatFile::TRecIter TRecIter;
+ typedef typename TRecIter::TPageIter TPageIter;
+ typedef typename TExtInfoType<TVal>::TResult TExtInfo;
+
+public:
+ using TDatFile::IsOpen;
+
+ TInIndexFile()
+ : Index0(nullptr)
+ {
+ }
+
+ int Open(const char* fname, size_t pages = 2, int pagesOrBytes = 1, ui32* gotRecordSig = nullptr) {
+ int ret = TDatFile::Open(fname, pages, pagesOrBytes, gotRecordSig);
+ if (ret)
+ return ret;
+ if (!(Index0 = (TDatPage*)malloc(TPageIter::GetPageSize()))) {
+ TDatFile::Close();
+ return MBDB_NO_MEMORY;
+ }
+ if (!TExtInfoType<TVal>::Exists && SizeOf((TVal*)nullptr))
+ RecsOnPage = (TPageIter::GetPageSize() - sizeof(TDatPage)) / DatCeil(SizeOf((TVal*)nullptr));
+ TDatFile::Next();
+ memcpy(Index0, TPageIter::Current(), TPageIter::GetPageSize());
+ return 0;
+ }
+
+ int Close() {
+ free(Index0);
+ Index0 = nullptr;
+ return TDatFile::Close();
+ }
+
+ inline int GetError() const {
+ return TDatFile::GetError();
+ }
+
+ int FindKey(const TVal* akey, const TExtInfo* extInfo = nullptr) {
+ assert(IsOpen());
+ if (TExtInfoType<TVal>::Exists || !SizeOf((TVal*)nullptr))
+ return FindVszKey(akey, extInfo);
+ int num = FindKeyOnPage(Index0, akey);
+ TDatPage* page = TPageIter::GotoPage(num + 1);
+ if (!page)
+ return 0;
+ num = FindKeyOnPage(page, akey);
+ num += (TPageIter::GetPageNum() - 1) * RecsOnPage;
+ return num;
+ }
+
+ int FindVszKey(const TVal* akey, const TExtInfo* extInfo = NULL) {
+ int num = FindVszKeyOnPage(Index0, akey, extInfo);
+ int num_add = 0;
+ for (int p = 0; p < num; p++) {
+ TDatPage* page = TPageIter::GotoPage(p + 1);
+ if (!page)
+ return 0;
+ num_add += page->RecNum;
+ }
+ TDatPage* page = TPageIter::GotoPage(num + 1);
+ if (!page)
+ return 0;
+ num = FindVszKeyOnPage(page, akey, extInfo);
+ num += num_add;
+ return num;
+ }
+
+protected:
+ int FindKeyOnPage(TDatPage* page, const TVal* key) {
+ int left = 0;
+ int right = page->RecNum - 1;
+ int recsize = DatCeil(SizeOf((TVal*)nullptr));
+ while (left < right) {
+ int middle = (left + right) >> 1;
+ if (*((TVal*)((char*)page + sizeof(TDatPage) + middle * recsize)) < *key)
+ left = middle + 1;
+ else
+ right = middle;
+ }
+ //borders check (left and right)
+ return (left == 0 || *((TVal*)((char*)page + sizeof(TDatPage) + left * recsize)) < *key) ? left : left - 1;
+ }
+
+ // will deserialize rawExtinfoA to extInfoA only if necessery
+ inline bool KeyLess_(const TVal* a, const TVal* b,
+ TExtInfo* extInfoA, const TExtInfo* extInfoB,
+ const ui8* rawExtInfoA, size_t rawLen) {
+ if (*a < *b) {
+ return true;
+ } else if (!extInfoB || *b < *a) {
+ return false;
+ } else {
+ // *a == *b && extInfoB
+ Y_PROTOBUF_SUPPRESS_NODISCARD extInfoA->ParseFromArray(rawExtInfoA, rawLen);
+ return (*extInfoA < *extInfoB);
+ }
+ }
+
+ int FindVszKeyOnPage(TDatPage* page, const TVal* key, const TExtInfo* extInfo) {
+ TVal* cur = (TVal*)((char*)page + sizeof(TDatPage));
+ ui32 recnum = page->RecNum;
+ if (!TExtInfoType<TVal>::Exists) {
+ for (; recnum > 0 && *cur < *key; --recnum)
+ cur = (TVal*)((char*)cur + DatCeil(SizeOf(cur)));
+ } else {
+ size_t ll;
+ size_t l;
+ size_t sz = NMicroBDB::SizeOfExt(cur, &ll, &l);
+ TExtInfo ei;
+ for (; recnum > 0 && KeyLess_(cur, key, &ei, extInfo, (ui8*)cur + sz + ll, l); --recnum) {
+ cur = (TVal*)((ui8*)cur + DatCeil(sz + ll + l));
+ sz = NMicroBDB::SizeOfExt(cur, &ll, &l);
+ }
+ }
+
+ int idx = page->RecNum - recnum - 1;
+ return (idx >= 0) ? idx : 0;
+ }
+
+ TDatPage* Index0;
+ int RecsOnPage;
+};
+
+template <class TVal, class TKey, class TPageIterator = TInputPageIterator<TInputPageFile>>
+class TKeyFileMixin: public TInDatFileImpl<TVal, TInputRecordIterator<TVal, TPageIterator>> {
+protected:
+ TInIndexFile<TKey> KeyFile;
+};
+
+template <class TVal, class TKey, class TBase = TKeyFileMixin<TVal, TKey>>
+class TDirectInDatFile: public TBase {
+ typedef TBase TDatFile;
+ typedef typename TDatFile::TRecIter TRecIter;
+ typedef typename TDatFile::TPageIter TPageIter;
+
+public:
+ void Open(const char* path, size_t pages = 1, size_t keypages = 1, int pagesOrBytes = 1) {
+ int ret;
+ ui32 gotRecordSig = 0;
+
+ ret = TDatFile::Open(path, pages, pagesOrBytes, &gotRecordSig);
+ if (ret) {
+ ythrow yexception() << ErrorMessage(ret, "Failed to open input file", path, TVal::RecordSig, gotRecordSig);
+ }
+ char KeyName[PATH_MAX + 1];
+ if (DatNameToIdx(KeyName, path)) {
+ ythrow yexception() << ErrorMessage(MBDB_BAD_FILENAME, "Failed to open input file", path);
+ }
+ gotRecordSig = 0;
+ ret = KeyFile.Open(KeyName, keypages, 1, &gotRecordSig);
+ if (ret) {
+ ythrow yexception() << ErrorMessage(ret, "Failed to open input keyfile", KeyName, TKey::RecordSig, gotRecordSig);
+ }
+ }
+
+ void Close() {
+ int ret;
+
+ if (TDatFile::IsOpen() && (ret = TDatFile::GetError()))
+ if (!std::uncaught_exception())
+ ythrow yexception() << ErrorMessage(ret, "Error before closing input file");
+ if ((ret = TDatFile::Close()))
+ if (!std::uncaught_exception())
+ ythrow yexception() << ErrorMessage(ret, "Error while closing input file");
+
+ if (KeyFile.IsOpen() && (ret = KeyFile.GetError()))
+ if (!std::uncaught_exception())
+ ythrow yexception() << ErrorMessage(ret, "Error before closing input keyfile");
+ if ((ret = KeyFile.Close()))
+ if (!std::uncaught_exception())
+ ythrow yexception() << ErrorMessage(ret, "Error while closing input keyfile");
+ }
+
+ const TVal* FindRecord(const TKey* key, const typename TExtInfoType<TKey>::TResult* extInfo = nullptr) {
+ int page = KeyFile.FindKey(key, extInfo);
+ const TVal* val = TRecIter::GotoPage(page);
+ if (!TExtInfoType<TVal>::Exists || !extInfo) {
+ TKey k;
+ while (val) {
+ TMakeExtKey<TVal, TKey>::Make(&k, nullptr, val, nullptr);
+ if (!(k < *key))
+ break;
+ val = TRecIter::Next();
+ }
+ } else {
+ typename TExtInfoType<TVal>::TResult valExt;
+ TKey k;
+ typename TExtInfoType<TKey>::TResult kExt;
+ while (val) {
+ TRecIter::GetExtInfo(&valExt);
+ TMakeExtKey<TVal, TKey>::Make(&k, &kExt, val, &valExt);
+ if (*key < k || !(k < *key) && !(kExt < *extInfo)) // k > *key || k == *key && kExt >= *extInfo
+ break;
+ val = TRecIter::Next();
+ }
+ }
+ return val;
+ }
+
+ int FindPagesNo(const TKey* key, const typename TExtInfoType<TVal>::TResult* extInfo = NULL) {
+ return KeyFile.FindKey(key, extInfo);
+ }
+
+protected:
+ using TBase::KeyFile;
+};
diff --git a/library/cpp/microbdb/microbdb.cpp b/library/cpp/microbdb/microbdb.cpp
new file mode 100644
index 0000000000..c10dbdf126
--- /dev/null
+++ b/library/cpp/microbdb/microbdb.cpp
@@ -0,0 +1 @@
+#include "microbdb.h"
diff --git a/library/cpp/microbdb/microbdb.h b/library/cpp/microbdb/microbdb.h
new file mode 100644
index 0000000000..7521887337
--- /dev/null
+++ b/library/cpp/microbdb/microbdb.h
@@ -0,0 +1,54 @@
+#pragma once
+
+#include <util/folder/dirut.h>
+
+#if defined(_MSC_VER)
+#pragma warning(push)
+#pragma warning(disable : 4706) /*assignment within conditional expression*/
+#pragma warning(disable : 4267) /*conversion from 'size_t' to 'type', possible loss of data*/
+#endif
+
+#include "align.h"
+#include "extinfo.h"
+#include "header.h"
+#include "reader.h"
+#include "heap.h"
+#include "file.h"
+#include "sorter.h"
+#include "input.h"
+#include "output.h"
+#include "sorterdef.h"
+
+inline int MakeSorterTempl(char path[/*FILENAME_MAX*/], const char* prefix) {
+ int ret = MakeTempDir(path, prefix);
+ if (!ret && strlcat(path, "%06d", FILENAME_MAX) > FILENAME_MAX - 100)
+ ret = EINVAL;
+ if (ret)
+ path[0] = 0;
+ return ret;
+}
+
+inline int GetMeta(TFile& file, TDatMetaPage* meta) {
+ ui8 buf[METASIZE], *ptr = buf;
+ ssize_t size = sizeof(buf), ret;
+ while (size && (ret = file.Read(ptr, size)) > 0) {
+ size -= ret;
+ ptr += ret;
+ }
+ if (size)
+ return MBDB_BAD_FILE_SIZE;
+ ptr = buf; // gcc 4.4 warning fix
+ *meta = *(TDatMetaPage*)ptr;
+ return (meta->MetaSig == METASIG) ? 0 : MBDB_BAD_METAPAGE;
+}
+
+template <class TRec>
+inline bool IsDatFile(const char* fname) {
+ TDatMetaPage meta;
+ TFile f(fname, RdOnly);
+ return !GetMeta(f, &meta) && meta.RecordSig == TRec::RecordSig;
+}
+
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif
diff --git a/library/cpp/microbdb/noextinfo.proto b/library/cpp/microbdb/noextinfo.proto
new file mode 100644
index 0000000000..6a78882e07
--- /dev/null
+++ b/library/cpp/microbdb/noextinfo.proto
@@ -0,0 +1,4 @@
+
+message TNoExtInfo {
+}
+
diff --git a/library/cpp/microbdb/output.h b/library/cpp/microbdb/output.h
new file mode 100644
index 0000000000..d0ecab2108
--- /dev/null
+++ b/library/cpp/microbdb/output.h
@@ -0,0 +1,1049 @@
+#pragma once
+
+#include "header.h"
+#include "file.h"
+
+#include <util/generic/buffer.h>
+#include <util/memory/tempbuf.h>
+
+#include <sys/uio.h>
+
+template <class TFileManip>
+inline ssize_t Writev(TFileManip& fileManip, const struct iovec* iov, int iovcnt) {
+ ssize_t written_count = 0;
+ for (int n = 0; n < iovcnt; n++) {
+ ssize_t last_write = fileManip.Write(iov[n].iov_base, iov[n].iov_len);
+ if (last_write < 0)
+ return -1;
+ written_count += last_write;
+ }
+ return written_count;
+}
+
+//*********************************************************************
+struct TFakeIndexer {
+ inline void NextPage(TDatPage*) noexcept {
+ }
+};
+
+struct TCallbackIndexer {
+ typedef void (*TCallback)(void* This, const TDatPage* page);
+
+ TCallbackIndexer() {
+ Callback = nullptr;
+ }
+
+ void SetCallback(void* t, TCallback c) {
+ This = t;
+ Callback = c;
+ }
+
+ void NextPage(TDatPage* dat) {
+ Callback(This, dat);
+ }
+
+ TCallback Callback;
+ void* This;
+};
+
+template <class TVal, typename TBasePageIter, typename TBaseIndexer = TFakeIndexer, typename TCompressor = TFakeCompression>
+class TOutputRecordIterator;
+
+template <class TVal, typename TBasePageIter, typename TBaseIndexer>
+class TOutputRecordIterator<TVal, TBasePageIter, TBaseIndexer, TFakeCompression>
+ : public TBasePageIter, public TBaseIndexer {
+public:
+ enum EOffset {
+ WrongOffset = size_t(-1)
+ };
+
+ typedef TBasePageIter TPageIter;
+ typedef TBaseIndexer TIndexer;
+
+ TOutputRecordIterator() {
+ Clear();
+ }
+
+ ~TOutputRecordIterator() {
+ Term();
+ }
+
+ inline const TVal* Current() const {
+ return Rec;
+ }
+
+ const TVal* Push(const TVal* v, const typename TExtInfoType<TVal>::TResult* extInfo = nullptr) {
+ NMicroBDB::AssertValid(v);
+ size_t len = SizeOf(v);
+ if (!TExtInfoType<TVal>::Exists)
+ return (Reserve(len)) ? (TVal*)memcpy(Rec, v, len) : nullptr;
+ else if (extInfo) {
+ size_t extSize = extInfo->ByteSize();
+ size_t extLenSize = len_long((i64)extSize);
+ if (!Reserve(len + extLenSize + extSize))
+ return nullptr;
+ memcpy(Rec, v, len);
+ out_long((i64)extSize, (char*)Rec + len);
+ extInfo->SerializeWithCachedSizesToArray((ui8*)Rec + len + extLenSize);
+ return Rec;
+ } else {
+ size_t extLenSize = len_long((i64)0);
+ if (!Reserve(len + extLenSize))
+ return nullptr;
+ memcpy(Rec, v, len);
+ out_long((i64)0, (char*)Rec + len);
+ return Rec;
+ }
+ }
+
+ const TVal* Push(const TVal* v, const ui8* extInfoRaw, size_t extLen) {
+ NMicroBDB::AssertValid(v);
+ size_t sz = SizeOf(v);
+ if (!Reserve(sz + extLen))
+ return nullptr;
+ memcpy(Rec, v, sz);
+ memcpy((ui8*)Rec + sz, extInfoRaw, extLen);
+ return Rec;
+ }
+
+ // use values stored in microbdb readers/writers internal buffer only.
+ // method expects serialized extInfo after this record
+ const TVal* PushWithExtInfo(const TVal* v) {
+ NMicroBDB::AssertValid(v);
+ size_t extSize;
+ size_t extLenSize;
+ size_t sz = NMicroBDB::SizeOfExt(v, &extLenSize, &extSize);
+ sz += extLenSize + extSize;
+ if (!Reserve(sz))
+ return nullptr;
+ memcpy(Rec, v, sz);
+ return Rec;
+ }
+
+ TVal* Reserve(size_t len) {
+ if (CurLen + DatCeil(len) > TPageIter::GetPageSize()) {
+ if (sizeof(TDatPage) + DatCeil(len) > TPageIter::GetPageSize())
+ return Rec = nullptr;
+ if (TPageIter::Current() && RecNum) {
+ TPageIter::Current()->RecNum = RecNum;
+ TPageIter::Current()->Format = MBDB_FORMAT_RAW;
+ memset((char*)TPageIter::Current() + CurLen, 0, TPageIter::GetPageSize() - CurLen);
+ TIndexer::NextPage(TPageIter::Current());
+ RecNum = 0;
+ }
+ if (!TPageIter::Next()) {
+ CurLen = TPageIter::GetPageSize();
+ return Rec = nullptr;
+ }
+ CurLen = sizeof(TDatPage);
+ }
+ LenForOffset = CurLen;
+ Rec = (TVal*)((char*)TPageIter::Current() + CurLen);
+ DatSet(Rec, len);
+
+ CurLen += DatCeil(len);
+
+ ++RecNum;
+ return Rec;
+ }
+
+ void Flush() {
+ TPageIter::Current()->RecNum = RecNum;
+ TPageIter::Current()->Format = MBDB_FORMAT_RAW;
+ }
+
+ size_t Offset() const {
+ return Rec ? TPageIter::Offset() + LenForOffset : WrongOffset;
+ }
+
+ void ResetDat() {
+ CurLen = (char*)Rec - (char*)TPageIter::Current();
+ size_t len;
+ if (!TExtInfoType<TVal>::Exists) {
+ len = SizeOf(Rec);
+ } else {
+ size_t ll;
+ size_t l;
+ len = NMicroBDB::SizeOfExt(Rec, &ll, &l);
+ len += ll + l;
+ }
+ CurLen += DatCeil(len);
+ }
+
+protected:
+ void Clear() {
+ Rec = nullptr;
+ RecNum = 0;
+ CurLen = 0;
+ LenForOffset = 0;
+ }
+
+ int Init() {
+ Clear();
+ CurLen = TPageIter::GetPageSize();
+ return 0;
+ }
+
+ int Term() {
+ if (TPageIter::Current()) {
+ TPageIter::Current()->RecNum = RecNum;
+ TPageIter::Current()->Format = MBDB_FORMAT_RAW;
+ memset((char*)TPageIter::Current() + CurLen, 0, TPageIter::GetPageSize() - CurLen);
+ RecNum = 0;
+ }
+ int ret = !TPageIter::Current() && RecNum;
+ Clear();
+ return ret;
+ }
+
+ int GotoPage(int pageno) {
+ if (TPageIter::Current()) {
+ TPageIter::Current()->RecNum = RecNum;
+ TPageIter::Current()->Format = MBDB_FORMAT_RAW;
+ memset((char*)TPageIter::Current() + CurLen, 0, TPageIter::GetPageSize() - CurLen);
+ }
+ int ret = TPageIter::GotoPage(pageno);
+ if (!ret) {
+ RecNum = 0;
+ CurLen = sizeof(TDatPage);
+ }
+ return ret;
+ }
+
+ TVal* Rec;
+ int RecNum;
+ size_t CurLen;
+ size_t LenForOffset;
+};
+
+template <class TVal, typename TBasePageIter, typename TBaseIndexer, typename TAlgorithm>
+class TOutputRecordIterator
+ : public TBasePageIter,
+ public TBaseIndexer,
+ private TAlgorithm {
+ class TPageBuffer {
+ public:
+ void Init(size_t page) {
+ Pos = 0;
+ RecNum = 0;
+ Size = Min(page / 2, size_t(64 << 10));
+ Data.Reset(new ui8[Size]);
+ }
+
+ void Clear() {
+ Pos = 0;
+ RecNum = 0;
+ }
+
+ inline bool Empty() const {
+ return RecNum == 0;
+ }
+
+ public:
+ size_t Size;
+ size_t Pos;
+ int RecNum;
+ TArrayHolder<ui8> Data;
+ };
+
+public:
+ typedef TBasePageIter TPageIter;
+ typedef TBaseIndexer TIndexer;
+
+ TOutputRecordIterator()
+ : Rec(nullptr)
+ , RecNum(0)
+ {
+ }
+
+ ~TOutputRecordIterator() {
+ Term();
+ }
+
+ const TVal* Current() const {
+ return Rec;
+ }
+
+ const TVal* Push(const TVal* v, const typename TExtInfoType<TVal>::TResult* extInfo = nullptr) {
+ NMicroBDB::AssertValid(v);
+ size_t len = SizeOf(v);
+ if (!TExtInfoType<TVal>::Exists)
+ return (Reserve(len)) ? (TVal*)memcpy((TVal*)Rec, v, len) : nullptr;
+ else if (extInfo) {
+ size_t extSize = extInfo->ByteSize();
+ size_t extLenSize = len_long((i64)extSize);
+ if (!Reserve(len + extLenSize + extSize))
+ return nullptr;
+ memcpy(Rec, v, len);
+ out_long((i64)extSize, (char*)Rec + len);
+ extInfo->SerializeWithCachedSizesToArray((ui8*)Rec + len + extLenSize);
+ return Rec;
+ } else {
+ size_t extLenSize = len_long((i64)0);
+ if (!Reserve(len + extLenSize))
+ return nullptr;
+ memcpy(Rec, v, len);
+ out_long((i64)0, (char*)Rec + len);
+ return Rec;
+ }
+ }
+
+ const TVal* Push(const TVal* v, const ui8* extInfoRaw, size_t extLen) {
+ NMicroBDB::AssertValid(v);
+ size_t sz = SizeOf(v);
+ if (!Reserve(sz + extLen))
+ return NULL;
+ memcpy(Rec, v, sz);
+ memcpy((ui8*)Rec + sz, extInfoRaw, extLen);
+ return Rec;
+ }
+
+ // use values stored in microbdb readers/writers internal buffer only.
+ // method expects serialized extInfo after this record
+ const TVal* PushWithExtInfo(const TVal* v) {
+ NMicroBDB::AssertValid(v);
+ size_t extSize;
+ size_t extLenSize;
+ size_t sz = NMicroBDB::SizeOfExt(v, &extLenSize, &extSize);
+ sz += extLenSize + extSize;
+ if (!Reserve(sz))
+ return nullptr;
+ memcpy(Rec, v, sz);
+ return Rec;
+ }
+
+ TVal* Reserve(const size_t len) {
+ const size_t aligned = DatCeil(len);
+
+ if (!TPageIter::Current()) { // Allocate fist page
+ if (!TPageIter::Next()) {
+ CurLen = TPageIter::GetPageSize();
+ return Rec = nullptr;
+ }
+ CurLen = sizeof(TDatPage) + sizeof(TCompressedPage);
+ }
+
+ if (Buffer.Pos + aligned > Buffer.Size) {
+ if (Buffer.Pos == 0)
+ return Rec = nullptr;
+ if (FlushBuffer())
+ return Rec = nullptr;
+ if (Buffer.Pos + aligned + sizeof(TDatPage) + sizeof(TCompressedPage) > Buffer.Size)
+ return Rec = nullptr;
+ }
+
+ Rec = (TVal*)((char*)Buffer.Data.Get() + Buffer.Pos);
+ DatSet(Rec, len); // len is correct because DatSet set align tail to zero
+
+ Buffer.RecNum++;
+ Buffer.Pos += aligned;
+ ++RecNum;
+ return Rec;
+ }
+
+ void Flush() {
+ if (!Buffer.Empty()) {
+ FlushBuffer();
+ TPageIter::Current()->RecNum = RecNum;
+ TPageIter::Current()->Format = MBDB_FORMAT_COMPRESSED;
+ }
+ }
+
+ size_t Offset() const {
+ // According to vadya@ there is no evil to return 0 all the time
+ return 0;
+ }
+
+ void ResetDat() {
+ Buffer.Pos = (char*)Rec - (char*)Buffer.Data.Get();
+ size_t len = SizeOf(Rec);
+ Buffer.Pos += DatCeil(len);
+ }
+
+protected:
+ void Clear() {
+ RecNum = 0;
+ Rec = nullptr;
+ Count = 0;
+ CurLen = sizeof(TDatPage) + sizeof(TCompressedPage);
+ Buffer.Clear();
+ }
+
+ int Init() {
+ Clear();
+ Buffer.Init(TPageIter::GetPageSize());
+ TAlgorithm::Init();
+ return 0;
+ }
+
+ int Term() {
+ if (TPageIter::Current())
+ Commit();
+ int ret = !TPageIter::Current() && RecNum;
+ Clear();
+ TAlgorithm::Term();
+ return ret;
+ }
+
+ int GotoPage(int pageno) {
+ if (TPageIter::Current())
+ Commit();
+ int ret = TPageIter::GotoPage(pageno);
+ if (!ret)
+ Reset();
+ return ret;
+ }
+
+private:
+ void Commit() {
+ Flush();
+ TPageIter::Current()->RecNum = RecNum;
+ TPageIter::Current()->Format = MBDB_FORMAT_COMPRESSED;
+ SetCompressedPageHeader();
+
+ memset((char*)TPageIter::Current() + CurLen, 0, TPageIter::GetPageSize() - CurLen);
+ RecNum = 0;
+ Count = 0;
+ }
+
+ inline void SetCompressedPageHeader() {
+ TCompressedPage* const hdr = (TCompressedPage*)((ui8*)TPageIter::Current() + sizeof(TDatPage));
+
+ hdr->BlockCount = Count;
+ hdr->Algorithm = TAlgorithm::Code;
+ hdr->Version = 0;
+ hdr->Reserved = 0;
+ }
+
+ inline void Reset() {
+ RecNum = 0;
+ CurLen = sizeof(TDatPage) + sizeof(TCompressedPage);
+ Count = 0;
+ Buffer.Clear();
+ }
+
+ int FlushBuffer() {
+ TArrayHolder<ui8> data;
+ const ui8* const buf = Buffer.Data.Get();
+ size_t first = 0;
+
+ if (!TExtInfoType<TVal>::Exists)
+ first = DatCeil(SizeOf((TVal*)buf));
+ else {
+ size_t ll;
+ size_t l;
+ first = NMicroBDB::SizeOfExt((const TVal*)buf, &ll, &l);
+ first = DatCeil(first + ll + l);
+ }
+
+ size_t total = sizeof(NMicroBDB::TCompressedHeader) + first + ((Buffer.RecNum == 1) ? 0 : TAlgorithm::CompressBound(Buffer.Pos - first));
+ size_t real = total;
+
+ {
+ ui8* p = nullptr;
+ NMicroBDB::TCompressedHeader* hdr = nullptr;
+
+ // 1. Choose data destination (temporary buffer or dat-page)
+ if (CurLen + total > TPageIter::GetPageSize()) {
+ data.Reset(new ui8[total]);
+
+ hdr = (NMicroBDB::TCompressedHeader*)data.Get();
+ p = data.Get() + sizeof(NMicroBDB::TCompressedHeader);
+ } else {
+ p = (ui8*)TPageIter::Current() + CurLen;
+ hdr = (NMicroBDB::TCompressedHeader*)p;
+ p += sizeof(NMicroBDB::TCompressedHeader);
+ }
+
+ // 2. Compress data
+
+ // Fill header and first record
+ hdr->Original = Buffer.Pos;
+ hdr->Compressed = 0;
+ hdr->Count = Buffer.RecNum;
+ hdr->Reserved = 0;
+ memcpy(p, Buffer.Data.Get(), first);
+ // Fill compressed part
+ if (Buffer.RecNum > 1) {
+ size_t size = TAlgorithm::CompressBound(Buffer.Pos - first);
+
+ p += first;
+ TAlgorithm::Compress(p, size, buf + first, Buffer.Pos - first);
+
+ hdr->Compressed = size;
+
+ real = sizeof(NMicroBDB::TCompressedHeader) + first + size;
+ }
+ }
+
+ Y_ASSERT(sizeof(TDatPage) + sizeof(TCompressedPage) + real <= TPageIter::GetPageSize());
+
+ // 3. Check page capacity
+
+ if (CurLen + real > TPageIter::GetPageSize()) {
+ Y_ASSERT(data.Get() != nullptr);
+
+ if (TPageIter::Current() && RecNum) {
+ RecNum = RecNum - Buffer.RecNum;
+ TPageIter::Current()->RecNum = RecNum;
+ TPageIter::Current()->Format = MBDB_FORMAT_COMPRESSED;
+ SetCompressedPageHeader();
+ memset((char*)TPageIter::Current() + CurLen, 0, TPageIter::GetPageSize() - CurLen);
+ TIndexer::NextPage(TPageIter::Current());
+ RecNum = Buffer.RecNum;
+ Count = 0;
+ }
+ if (!TPageIter::Next()) {
+ CurLen = TPageIter::GetPageSize();
+ return MBDB_NO_MEMORY;
+ }
+ CurLen = sizeof(TDatPage) + sizeof(TCompressedPage);
+ }
+
+ // 4. Flush data and reset buffer state
+
+ if (data.Get())
+ memcpy((ui8*)TPageIter::Current() + CurLen, data.Get(), real);
+ CurLen += real;
+ ++Count;
+ Buffer.Clear();
+ return 0;
+ }
+
+private:
+ size_t CurLen;
+ TPageBuffer Buffer;
+ TVal* Rec;
+ ui32 Count; //! < count of compressed blocks on page
+public:
+ int RecNum;
+};
+
+template <typename TBaseWriter>
+class TOutputPageIterator: public TBaseWriter {
+public:
+ typedef TBaseWriter TWriter;
+
+ TOutputPageIterator()
+ : Buf(nullptr)
+ {
+ Clear();
+ }
+
+ ~TOutputPageIterator() {
+ Term();
+ }
+
+ TDatPage* Current() {
+ return CurPage;
+ }
+
+ size_t Offset() const {
+ //Cout << "PS = " << TWriter::GetPageSize() << "; PN = " << PageNum << "; MS = " << METASIZE << Endl;
+ return TWriter::GetPageSize() * PageNum + METASIZE;
+ }
+
+ int Freeze() {
+ return (Frozen = (PageNum == -1) ? 0 : (int)PageNum);
+ }
+
+ void Unfreeze() {
+ Frozen = -1;
+ }
+
+ inline int IsFrozen() const {
+ return Frozen + 1;
+ }
+
+ inline size_t GetPageSize() const {
+ return TWriter::GetPageSize();
+ }
+
+ inline int GetPageNum() const {
+ return (int)PageNum;
+ }
+
+ TDatPage* Next() {
+ if (PageNum >= Maxpage && WriteBuf())
+ return CurPage = nullptr;
+ CurPage = (TDatPage*)(Buf + ((++PageNum) % Bufpages) * GetPageSize());
+ memset(CurPage, 0, sizeof(TDatPage));
+ return CurPage;
+ }
+
+protected:
+ int Init(size_t pages, int pagesOrBytes) {
+ Term();
+ if (pagesOrBytes)
+ Bufpages = pages;
+ else
+ Bufpages = pages / GetPageSize();
+ Bufpages = Max<size_t>(1, Bufpages);
+ Maxpage = Bufpages - 1;
+ // if (!(Buf = (char*)malloc(Bufpages * GetPageSize())))
+ // return ENOMEM;
+ ABuf.Alloc(Bufpages * GetPageSize());
+ Buf = ABuf.Begin();
+ if (TWriter::Memo)
+ Freeze();
+ return 0;
+ }
+
+ int Term() {
+ Unfreeze();
+ int ret = (PageNum < 0) ? 0 : WriteBuf();
+ Clear();
+ return ret;
+ }
+
+ int GotoPage(int pageno) {
+ int ret = EAGAIN;
+ if (IsFrozen() || PageNum >= 0 && ((ret = WriteBuf())) || ((ret = TWriter::GotoPage(pageno))))
+ return ret;
+ PageNum = pageno;
+ Maxpage = Bufpages - 1 + pageno;
+ CurPage = (TDatPage*)(Buf + (PageNum % Bufpages) * GetPageSize());
+ memset(CurPage, 0, sizeof(TDatPage));
+ return 0;
+ }
+
+ void Clear() {
+ ABuf.Dealloc();
+ Buf = nullptr;
+ Maxpage = PageNum = Frozen = -1;
+ Bufpages = 0;
+ CurPage = nullptr;
+ }
+
+ int WriteBuf() {
+ int nvec;
+ iovec vec[2];
+ ssize_t minpage = Maxpage - Bufpages + 1;
+ ssize_t maxpage = Frozen == -1 ? PageNum : Frozen - 1;
+ if (maxpage < minpage)
+ return EAGAIN;
+ minpage %= Bufpages;
+ maxpage %= Bufpages;
+ if (maxpage < minpage) {
+ vec[0].iov_base = Buf + GetPageSize() * minpage;
+ vec[0].iov_len = GetPageSize() * (Bufpages - minpage);
+ vec[1].iov_base = Buf;
+ vec[1].iov_len = GetPageSize() * (maxpage + 1);
+ nvec = 2;
+ } else {
+ vec[0].iov_base = Buf + GetPageSize() * minpage;
+ vec[0].iov_len = GetPageSize() * (maxpage - minpage + 1);
+ nvec = 1;
+ }
+ if (TWriter::WritePages(vec, nvec))
+ return EIO;
+ Maxpage += (maxpage < minpage) ? (Bufpages - minpage + maxpage + 1) : (maxpage - minpage + 1);
+ return 0;
+ }
+
+ ssize_t Maxpage;
+ ssize_t Bufpages;
+ ssize_t PageNum;
+ int Frozen;
+ TDatPage* CurPage;
+ char* Buf;
+ TMappedAllocation ABuf;
+};
+
+template <class TFileManip>
+class TOutputPageFileImpl: private TNonCopyable {
+public:
+ TOutputPageFileImpl()
+ : Pagesize(0)
+ , Eof(1)
+ , Error(0)
+ , Memo(0)
+ , Recordsig(0)
+ {
+ }
+
+ ~TOutputPageFileImpl() {
+ Term();
+ }
+
+ inline int IsEof() const {
+ return Eof;
+ }
+
+ inline int GetError() const {
+ return Error;
+ }
+
+ inline bool IsOpen() const {
+ return FileManip.IsOpen();
+ }
+
+ inline size_t GetPageSize() const {
+ return Pagesize;
+ }
+
+ inline ui32 GetRecordSig() const {
+ return Recordsig;
+ }
+
+ int Init(const char* fname, size_t pagesize, ui32 recsig, bool direct = false) {
+ Memo = 0;
+ if (FileManip.IsOpen())
+ return MBDB_ALREADY_INITIALIZED;
+
+ if (!fname) {
+ Eof = Error = 0;
+ Pagesize = pagesize;
+ Recordsig = recsig;
+ Memo = 1;
+ return 0;
+ }
+
+ Error = FileManip.Open(fname, WrOnly | CreateAlways | ARW | AWOther | (direct ? DirectAligned : EOpenMode()));
+ if (Error)
+ return Error;
+ Error = Init(TFile(), pagesize, recsig);
+ if (Error) {
+ FileManip.Close();
+ unlink(fname);
+ }
+ return Error;
+ }
+
+ int Init(TAutoPtr<IOutputStream> output, size_t pagesize, ui32 recsig) {
+ Memo = 0;
+ if (FileManip.IsOpen()) {
+ return MBDB_ALREADY_INITIALIZED;
+ }
+
+ if (!output) {
+ Eof = Error = 0;
+ Pagesize = pagesize;
+ Recordsig = recsig;
+ Memo = 1;
+ return 0;
+ }
+
+ Error = FileManip.Open(output);
+ if (Error)
+ return Error;
+ Error = Init(TFile(), pagesize, recsig);
+ if (Error) {
+ FileManip.Close();
+ }
+ return Error;
+ }
+
+ int Init(const TFile& file, size_t pagesize, ui32 recsig) {
+ Memo = 0;
+ if (!file.IsOpen() && !FileManip.IsOpen())
+ return MBDB_NOT_INITIALIZED;
+ if (file.IsOpen() && FileManip.IsOpen())
+ return MBDB_ALREADY_INITIALIZED;
+ if (file.IsOpen()) {
+ Error = FileManip.Init(file);
+ if (Error)
+ return Error;
+ }
+
+ Eof = 1;
+ TTempBuf buf(METASIZE + FS_BLOCK_SIZE);
+ const char* ptr = (buf.Data() + FS_BLOCK_SIZE - ((ui64)buf.Data() & (FS_BLOCK_SIZE - 1)));
+ TDatMetaPage* meta = (TDatMetaPage*)ptr;
+
+ memset(buf.Data(), 0, buf.Size());
+ meta->MetaSig = METASIG;
+ meta->PageSize = Pagesize = pagesize;
+ meta->RecordSig = Recordsig = recsig;
+
+ ssize_t size = METASIZE, ret = 0;
+ while (size && (ret = FileManip.Write(ptr, (unsigned)size)) > 0) {
+ size -= ret;
+ ptr += ret;
+ }
+ if (size || ret <= 0) {
+ Term();
+ return Error = errno ? errno : MBDB_WRITE_ERROR;
+ }
+
+ Error = Eof = 0;
+ return Error;
+ }
+
+protected:
+ int WritePages(iovec* vec, int nvec) {
+ if (Error || Memo)
+ return Error;
+
+ ssize_t size, delta;
+ iovec* pvec;
+ int vsize;
+
+ for (vsize = 0, pvec = vec; vsize < nvec; vsize++, pvec++)
+ for (size = 0; (size_t)size < pvec->iov_len; size += Pagesize)
+ ((TDatPage*)((char*)pvec->iov_base + size))->PageSig = PAGESIG;
+
+ delta = size = 0;
+ pvec = vec;
+ vsize = nvec;
+ while (vsize && (size = Writev(FileManip, pvec, (int)Min(vsize, 16))) > 0) {
+ if (delta) {
+ size += delta;
+ pvec->iov_len += delta;
+ pvec->iov_base = (char*)pvec->iov_base - delta;
+ delta = 0;
+ }
+ while (size) {
+ if ((size_t)size >= pvec->iov_len) {
+ size -= pvec->iov_len;
+ ++pvec;
+ --vsize;
+ } else {
+ delta = size;
+ pvec->iov_len -= size;
+ pvec->iov_base = (char*)pvec->iov_base + size;
+ size = 0;
+ }
+ }
+ }
+ if (delta) {
+ pvec->iov_len += delta;
+ pvec->iov_base = (char*)pvec->iov_base - delta;
+ }
+ return Error = (!size && !vsize) ? 0 : errno ? errno : MBDB_WRITE_ERROR;
+ }
+
+ i64 Tell() {
+ return FileManip.RealSeek(0, SEEK_CUR);
+ }
+
+ int GotoPage(int pageno) {
+ if (Error || Memo)
+ return Error;
+ Eof = 0;
+ i64 offset = (i64)pageno * Pagesize + METASIZE;
+ if (offset != FileManip.Seek(offset, SEEK_SET))
+ Error = MBDB_BAD_FILE_SIZE;
+ return Error;
+ }
+
+ int Term() {
+ int ret = FileManip.Close();
+ Eof = 1;
+ Memo = 0;
+ if (!Error)
+ Error = ret;
+ return Error;
+ }
+
+ size_t Pagesize;
+ int Eof;
+ int Error;
+ int Memo;
+ ui32 Recordsig;
+
+private:
+ TFileManip FileManip;
+};
+
+using TOutputPageFile = TOutputPageFileImpl<TOutputFileManip>;
+
+template <class TVal,
+ typename TBaseRecIter = TOutputRecordIterator<TVal, TOutputPageIterator<TOutputPageFile>>>
+class TOutDatFileImpl: public TBaseRecIter {
+public:
+ typedef TBaseRecIter TRecIter;
+ typedef typename TRecIter::TPageIter TPageIter;
+ typedef typename TRecIter::TPageIter::TWriter TWriter;
+
+ int Open(const char* fname, size_t pagesize, size_t pages = 1, int pagesOrBytes = 1, bool direct = false) {
+ int ret = TWriter::Init(fname, pagesize, TVal::RecordSig, direct);
+ return ret ? ret : Open2(pages, pagesOrBytes);
+ }
+
+ int Open(const TFile& file, size_t pagesize, size_t pages = 1, int pagesOrBytes = 1) {
+ int ret = TWriter::Init(file, pagesize, TVal::RecordSig);
+ return ret ? ret : Open2(pages, pagesOrBytes);
+ }
+
+ int Open(TAutoPtr<IOutputStream> output, size_t pagesize, size_t pages = 1, int pagesOrBytes = 1) {
+ int ret = TWriter::Init(output, pagesize, TVal::RecordSig);
+ return ret ? ret : Open2(pages, pagesOrBytes);
+ }
+
+ int Close() {
+ int ret1 = TRecIter::Term();
+ int ret2 = TPageIter::Term();
+ int ret3 = TWriter::Term();
+ return ret1 ? ret1 : ret2 ? ret2 : ret3;
+ }
+
+private:
+ int Open2(size_t pages, int pagesOrBytes) {
+ int ret = TPageIter::Init(pages, pagesOrBytes);
+ if (!ret)
+ ret = TRecIter::Init();
+ if (ret)
+ Close();
+ return ret;
+ }
+};
+
+template <class TVal>
+class TOutIndexFile: public TOutDatFileImpl<
+ TVal,
+ TOutputRecordIterator<TVal, TOutputPageIterator<TOutputPageFile>, TCallbackIndexer, TFakeCompression>> {
+ typedef TOutDatFileImpl<
+ TVal,
+ TOutputRecordIterator<TVal, TOutputPageIterator<TOutputPageFile>, TCallbackIndexer, TFakeCompression>>
+ TDatFile;
+ typedef TOutIndexFile<TVal> TMyType;
+ typedef typename TDatFile::TRecIter TRecIter;
+ typedef typename TRecIter::TPageIter TPageIter;
+ typedef typename TRecIter::TIndexer TIndexer;
+
+public:
+ TOutIndexFile() {
+ TIndexer::SetCallback(this, DispatchCallback);
+ }
+
+ int Open(const char* fname, size_t pagesize, size_t pages, int pagesOrBytes = 1) {
+ int ret = TDatFile::Open(fname, pagesize, pages, pagesOrBytes);
+ if (ret)
+ return ret;
+ if ((ret = TRecIter::GotoPage(1))) {
+ TDatFile::Close();
+ return ret;
+ }
+ Index0.Clear();
+ return ret;
+ }
+
+ int Close() {
+ TPageIter::Unfreeze();
+ if (TRecIter::RecNum) {
+ TRecIter::Flush();
+ NextPage(TPageIter::Current());
+ }
+ int ret = 0;
+ if (Index0.Size() && !(ret = TRecIter::GotoPage(0))) {
+ const char* ptr = Index0.Begin();
+ size_t recSize;
+ while (ptr < Index0.End()) {
+ Y_ASSERT((size_t)(Index0.End() - ptr) >= sizeof(size_t));
+ memcpy(&recSize, ptr, sizeof(size_t));
+ ptr += sizeof(size_t);
+ Y_ASSERT((size_t)(Index0.End() - ptr) >= recSize);
+ ui8* buf = (ui8*)TRecIter::Reserve(recSize);
+ if (!buf) {
+ ret = MBDB_PAGE_OVERFLOW;
+ break;
+ }
+ memcpy(buf, ptr, recSize);
+ TRecIter::ResetDat();
+ ptr += recSize;
+ }
+ Index0.Clear();
+ ret = (TPageIter::GetPageNum() != 0) ? MBDB_PAGE_OVERFLOW : TPageIter::GetError();
+ }
+ int ret1 = TDatFile::Close();
+ return ret ? ret : ret1;
+ }
+
+protected:
+ TBuffer Index0;
+
+ void NextPage(const TDatPage* page) {
+ const TVal* first = (const TVal*)NMicroBDB::GetFirstRecord(page);
+ size_t sz;
+ if (!TExtInfoType<TVal>::Exists) {
+ sz = SizeOf(first);
+ } else {
+ size_t ll;
+ size_t l;
+ sz = NMicroBDB::SizeOfExt(first, &ll, &l);
+ sz += ll + l;
+ }
+ Index0.Append((const char*)&sz, sizeof(size_t));
+ Index0.Append((const char*)first, sz);
+ }
+
+ static void DispatchCallback(void* This, const TDatPage* page) {
+ ((TMyType*)This)->NextPage(page);
+ }
+};
+
+template <class TVal, class TKey, typename TCompressor = TFakeCompression, class TPageFile = TOutputPageFile>
+class TOutDirectFileImpl: public TOutDatFileImpl<
+ TVal,
+ TOutputRecordIterator<TVal, TOutputPageIterator<TPageFile>, TCallbackIndexer, TCompressor>> {
+ typedef TOutDatFileImpl<
+ TVal,
+ TOutputRecordIterator<TVal, TOutputPageIterator<TPageFile>, TCallbackIndexer, TCompressor>>
+ TDatFile;
+ typedef TOutDirectFileImpl<TVal, TKey, TCompressor, TPageFile> TMyType;
+ typedef typename TDatFile::TRecIter TRecIter;
+ typedef typename TRecIter::TPageIter TPageIter;
+ typedef typename TRecIter::TIndexer TIndexer;
+ typedef TOutIndexFile<TKey> TKeyFile;
+
+public:
+ TOutDirectFileImpl() {
+ TIndexer::SetCallback(this, DispatchCallback);
+ }
+
+ int Open(const char* fname, size_t pagesize, int pages = 1, size_t ipagesize = 0, size_t ipages = 1, int pagesOrBytes = 1) {
+ char iname[FILENAME_MAX];
+ int ret;
+ if (ipagesize == 0)
+ ipagesize = pagesize;
+ ret = TDatFile::Open(fname, pagesize, pages, pagesOrBytes);
+ ret = ret ? ret : DatNameToIdx(iname, fname);
+ ret = ret ? ret : KeyFile.Open(iname, ipagesize, ipages, pagesOrBytes);
+ if (ret)
+ TDatFile::Close();
+ return ret;
+ }
+
+ int Close() {
+ if (TRecIter::RecNum) {
+ TRecIter::Flush();
+ NextPage(TPageIter::Current());
+ }
+ int ret = KeyFile.Close();
+ int ret1 = TDatFile::Close();
+ return ret1 ? ret1 : ret;
+ }
+
+ int GetError() const {
+ return TDatFile::GetError() ? TDatFile::GetError() : KeyFile.GetError();
+ }
+
+protected:
+ TKeyFile KeyFile;
+
+ void NextPage(const TDatPage* page) {
+ typedef TMakeExtKey<TVal, TKey> TMakeExtKey;
+
+ TVal* val = (TVal*)NMicroBDB::GetFirstRecord(page);
+ TKey key;
+ if (!TMakeExtKey::Exists) {
+ TMakeExtKey::Make(&key, nullptr, val, nullptr);
+ KeyFile.Push(&key);
+ } else {
+ size_t ll;
+ size_t l;
+ size_t sz = NMicroBDB::SizeOfExt(val, &ll, &l);
+ typename TExtInfoType<TVal>::TResult valExt;
+ if (TExtInfoType<TVal>::Exists)
+ Y_PROTOBUF_SUPPRESS_NODISCARD valExt.ParseFromArray((ui8*)val + sz + ll, l);
+ typename TExtInfoType<TKey>::TResult keyExt;
+ TMakeExtKey::Make(&key, &keyExt, val, &valExt);
+ KeyFile.Push(&key, &keyExt);
+ }
+ }
+
+ static void DispatchCallback(void* This, const TDatPage* page) {
+ ((TMyType*)This)->NextPage(page);
+ }
+};
diff --git a/library/cpp/microbdb/powersorter.h b/library/cpp/microbdb/powersorter.h
new file mode 100644
index 0000000000..c40de9c23f
--- /dev/null
+++ b/library/cpp/microbdb/powersorter.h
@@ -0,0 +1,667 @@
+#pragma once
+
+#include "safeopen.h"
+
+#include <util/generic/vector.h>
+#include <util/generic/deque.h>
+#include <util/system/mutex.h>
+#include <util/system/condvar.h>
+#include <util/thread/pool.h>
+
+template <
+ class TRecord,
+ template <typename T> class TCompare,
+ class TSieve,
+ class TMemoFile = TOutDatFile<TRecord>>
+class TDatSorterBuf {
+public:
+ typedef TRecord TRec;
+ typedef TVector<TRec*> TVectorType;
+ typedef TMemoFile TMemo;
+ typedef TCompare<TRecord> TComp;
+
+public:
+ TDatSorterBuf(size_t memory, size_t pageSize)
+ : Memo("memo", pageSize, memory, 0)
+ , Cur()
+ {
+ Memo.Open(nullptr);
+ Memo.Freeze();
+ }
+
+ ~TDatSorterBuf() {
+ Vector.clear();
+ Memo.Close();
+ }
+
+ const TRec* Push(const TRec* v) {
+ const TRec* u = Memo.Push(v);
+ if (u)
+ Vector.push_back((TRec*)u);
+ return u;
+ }
+
+ const TRec* Next() {
+ if (Ptr == Vector.end()) {
+ if (Cur)
+ TSieve::Sieve(Cur, Cur);
+ Cur = nullptr;
+ } else {
+ Cur = *Ptr++;
+ if (!TIsSieveFake<TSieve>::Result)
+ while (Ptr != Vector.end() && TSieve::Sieve(Cur, *Ptr))
+ ++Ptr;
+ }
+ return Cur;
+ }
+
+ const TRec* Current() {
+ return Cur;
+ }
+
+ size_t Size() {
+ return Vector.size();
+ }
+
+ void Sort() {
+ Ptr = Vector.begin();
+ Cur = nullptr;
+
+ MBDB_SORT_FUN(Vector.begin(), Vector.end(), TComp());
+ }
+
+ void Clear() {
+ Vector.clear();
+ Memo.Freeze();
+ Ptr = Vector.begin();
+ Cur = nullptr;
+ }
+
+private:
+ TVectorType Vector;
+ TMemo Memo;
+
+ typename TVectorType::iterator
+ Ptr;
+ TRec* Cur;
+};
+
+template <
+ class TRecord,
+ class TInput,
+ template <typename T> class TCompare,
+ class TSieve>
+class TDatMerger {
+public:
+ typedef TRecord TRec;
+ typedef TCompare<TRecord> TComp;
+ typedef TSimpleSharedPtr<TInput> TInputPtr;
+ typedef TVector<TInputPtr> TInputVector;
+
+public:
+ ~TDatMerger() {
+ Close();
+ }
+
+ void Init(const TInputVector& inputs) {
+ Inputs = inputs;
+ TVector<TInput*> v;
+ for (int i = 0; i < Inputs.ysize(); ++i)
+ v.push_back(Inputs[i].Get());
+ HeapIter.Init(&v[0], v.size());
+ if (!TIsSieveFake<TSieve>::Result)
+ PNext = HeapIter.Next();
+ }
+
+ const TRec* Next() {
+ if (TIsSieveFake<TSieve>::Result) {
+ return HeapIter.Next();
+ }
+
+ if (!PNext) {
+ if (PCur) {
+ TSieve::Sieve(PCur, PCur);
+ PCur = nullptr;
+ }
+ return nullptr;
+ }
+
+ PCur = &Cur;
+ memcpy(PCur, PNext, SizeOf((const TRec*)PNext));
+
+ do {
+ PNext = HeapIter.Next();
+ } while (PNext && TSieve::Sieve(PCur, PNext));
+
+ return PCur;
+ }
+
+ const TRec* Current() {
+ return (TIsSieveFake<TSieve>::Result ? HeapIter.Current() : PCur);
+ }
+
+ void Close() {
+ Inputs.clear();
+ HeapIter.Term();
+ }
+
+private:
+ TInputVector Inputs;
+ THeapIter<TRec, TInput, TComp> HeapIter;
+ TRec Cur;
+ TRec* PCur = nullptr;
+ const TRec* PNext = nullptr;
+};
+
+class TPortionManager {
+public:
+ void Open(const char* tempDir) {
+ TGuard<TMutex> guard(Mutex);
+ TempDir = tempDir;
+ }
+
+ TString Next() {
+ TGuard<TMutex> guard(Mutex);
+ if (Portions == 0)
+ DoOpen();
+ TString fname = GeneratePortionFilename(Portions++);
+ return fname;
+ }
+
+ void Close() {
+ TGuard<TMutex> guard(Mutex);
+ Portions = 0;
+ }
+
+private:
+ void DoOpen() {
+ if (MakeSorterTempl(PortionFilenameTempl, TempDir.data())) {
+ PortionFilenameTempl[0] = 0;
+ ythrow yexception() << "portion-manager: bad tempdir \"" << TempDir.data() << "\": " << LastSystemErrorText();
+ }
+ }
+
+ TString GeneratePortionFilename(int i) {
+ char str[FILENAME_MAX];
+ snprintf(str, sizeof(str), PortionFilenameTempl, i);
+ return TString(str);
+ }
+
+private:
+ TMutex Mutex;
+
+ TString TempDir;
+ char PortionFilenameTempl[FILENAME_MAX] = {};
+ int Portions = 0;
+};
+
+// A merger powered by threads
+template <
+ class TRecord,
+ template <typename T> class TCompare,
+ class TSieve,
+ class TInput = TInDatFile<TRecord>,
+ class TOutput = TOutDatFile<TRecord>>
+class TPowerMerger {
+public:
+ typedef TRecord TRec;
+ typedef TDatMerger<TRecord, TInput, TCompare, TSieve> TMerger;
+ typedef TSimpleSharedPtr<TMerger> TMergerPtr;
+ typedef TPowerMerger<TRecord, TCompare, TSieve, TInput, TOutput> TFileMerger;
+
+ struct TMergePortionTask: public IObjectInQueue {
+ TFileMerger* FileMerger;
+ int Begin;
+ int End;
+ TString OutFname;
+
+ TMergePortionTask(TFileMerger* fileMerger, int begin, int end, const TString& outFname)
+ : FileMerger(fileMerger)
+ , Begin(begin)
+ , End(end)
+ , OutFname(outFname)
+ {
+ }
+
+ void Process(void*) override {
+ THolder<TMergePortionTask> This(this);
+ //fprintf(stderr, "MergePortion: (%i, %i, %s)\n", Begin, End, ~OutFname);
+ FileMerger->MergePortion(Begin, End, OutFname);
+ }
+ };
+
+public:
+ TPowerMerger(const TSimpleSharedPtr<TThreadPool>& mtpQueue, const TSimpleSharedPtr<TPortionManager>& portMan,
+ int memory, int pageSize, bool autoUnlink)
+ : MtpQueue(mtpQueue)
+ , PortionManager(portMan)
+ , Memory(memory)
+ , PageSize(pageSize)
+ , AutoUnlink(autoUnlink)
+ {
+ }
+
+ TPowerMerger(const TSimpleSharedPtr<TThreadPool>& mtpQueue, const char* tempDir,
+ int memory, int pageSize, bool autoUnlink)
+ : MtpQueue(mtpQueue)
+ , PortionManager(new TPortionManager)
+ , Memory(memory)
+ , PageSize(pageSize)
+ , AutoUnlink(autoUnlink)
+ {
+ PortionManager->Open(tempDir);
+ }
+
+ ~TPowerMerger() {
+ Close();
+ }
+
+ void SetMtpQueue(const TSimpleSharedPtr<TThreadPool>& mtpQueue) {
+ MtpQueue = mtpQueue;
+ }
+
+ void MergePortion(int begin, int end, const TString& outFname) {
+ TMerger merger;
+ InitMerger(merger, begin, end);
+
+ TOutput out("mergeportion-tmpout", PageSize, BufSize, 0);
+ out.Open(outFname.data());
+ const TRec* rec;
+ while ((rec = merger.Next()))
+ out.Push(rec);
+ out.Close();
+
+ merger.Close();
+
+ {
+ TGuard<TMutex> guard(Mutex);
+ UnlinkFiles(begin, end);
+ Files.push_back(outFname);
+ --Tasks;
+ TaskFinishedCond.Signal();
+ }
+ }
+
+ void Add(const TString& fname) {
+ TGuard<TMutex> guard(Mutex);
+ // fprintf(stderr, "TPowerMerger::Add: %s\n", ~fname);
+ Files.push_back(fname);
+ if (InitialFilesEnd > 0)
+ ythrow yexception() << "TPowerMerger::Add: no more files allowed";
+ }
+
+ void Merge(int maxPortions) {
+ TGuard<TMutex> guard(Mutex);
+ InitialFilesEnd = Files.ysize();
+ if (!InitialFilesEnd)
+ ythrow yexception() << "TPowerMerger::Merge: no files added";
+ Optimize(maxPortions);
+ MergeMT();
+ InitMerger(Merger, CPortions, Files.ysize());
+ }
+
+ void Close() {
+ TGuard<TMutex> guard(Mutex);
+ Merger.Close();
+ UnlinkFiles(CPortions, Files.ysize());
+ InitialFilesEnd = CPortions = 0;
+ Files.clear();
+ }
+
+ const TRec* Next() {
+ return Merger.Next();
+ }
+
+ const TRec* Current() {
+ return Merger.Current();
+ }
+
+ int FileCount() const {
+ TGuard<TMutex> guard(Mutex);
+ return Files.ysize();
+ }
+
+private:
+ void InitMerger(TMerger& merger, int begin, int end) {
+ TGuard<TMutex> guard(Mutex);
+ TVector<TSimpleSharedPtr<TInput>> inputs;
+ for (int i = begin; i < end; ++i) {
+ inputs.push_back(new TInput("mergeportion-tmpin", BufSize, 0));
+ inputs.back()->Open(Files[i]);
+ // fprintf(stderr, "InitMerger: %i, %s\n", i, ~Files[i]);
+ }
+ merger.Init(inputs);
+ }
+
+ void UnlinkFiles(int begin, int end) {
+ TGuard<TMutex> guard(Mutex);
+ for (int i = begin; i < end; ++i) {
+ if (i >= InitialFilesEnd || AutoUnlink)
+ unlink(Files[i].c_str());
+ }
+ }
+
+ void Optimize(int maxPortions, size_t maxBufSize = 4u << 20) {
+ TGuard<TMutex> guard(Mutex);
+ maxPortions = std::min(maxPortions, Memory / PageSize - 1);
+ maxBufSize = std::max((size_t)PageSize, maxBufSize);
+
+ if (maxPortions <= 2) {
+ FPortions = MPortions = 2;
+ BufSize = PageSize;
+ return;
+ }
+
+ int Portions = Files.ysize();
+ if (maxPortions >= Portions) {
+ FPortions = MPortions = Portions;
+ } else if (((Portions + maxPortions - 1) / maxPortions) <= maxPortions) {
+ while (((Portions + maxPortions - 1) / maxPortions) <= maxPortions)
+ --maxPortions;
+ MPortions = ++maxPortions;
+ int total = ((Portions + MPortions - 1) / MPortions) + Portions;
+ FPortions = (total % MPortions) ? (total % MPortions) : (int)MPortions;
+ } else
+ FPortions = MPortions = maxPortions;
+
+ BufSize = std::min((size_t)(Memory / (MPortions + 1)), maxBufSize);
+ // fprintf(stderr, "Optimize: Portions=%i; MPortions=%i; FPortions=%i; Memory=%i; BufSize=%i\n",
+ // (int)Portions, (int)MPortions, (int)FPortions, (int)Memory, (int)BufSize);
+ }
+
+ void MergeMT() {
+ TGuard<TMutex> guard(Mutex);
+ do {
+ int n;
+ while ((n = Files.ysize() - CPortions) > MPortions) {
+ int m = std::min((CPortions == 0 ? (int)FPortions : (int)MPortions), n);
+ TString fname = PortionManager->Next();
+ if (!MtpQueue->Add(new TMergePortionTask(this, CPortions, CPortions + m, fname)))
+ ythrow yexception() << "TPowerMerger::MergeMT: failed to add task";
+ CPortions += m;
+ ++Tasks;
+ }
+ if (Tasks > 0)
+ TaskFinishedCond.Wait(Mutex);
+ } while (Tasks > 0);
+ }
+
+private:
+ TMutex Mutex;
+ TCondVar TaskFinishedCond;
+
+ TMerger Merger;
+ TSimpleSharedPtr<TThreadPool> MtpQueue;
+ TSimpleSharedPtr<TPortionManager> PortionManager;
+ TVector<TString> Files;
+ int Tasks = 0;
+ int InitialFilesEnd = 0;
+ int CPortions = 0;
+ int MPortions = 0;
+ int FPortions = 0;
+ int Memory = 0;
+ int PageSize = 0;
+ int BufSize = 0;
+ bool AutoUnlink = false;
+};
+
+// A sorter powered by threads
+template <
+ class TRecord,
+ template <typename T> class TCompare,
+ class TSieve = TFakeSieve<TRecord>,
+ class TTmpInput = TInDatFile<TRecord>,
+ class TTmpOutput = TOutDatFile<TRecord>>
+class TPowerSorter {
+public:
+ typedef TPowerSorter<TRecord, TCompare, TSieve, TTmpInput, TTmpOutput> TSorter;
+ typedef TRecord TRec;
+ typedef TTmpOutput TTmpOut;
+ typedef TTmpInput TTmpIn;
+ typedef TDatSorterBuf<TRecord, TCompare, TSieve> TSorterBuf;
+ typedef TCompare<TRecord> TComp;
+ typedef TPowerMerger<TRecord, TCompare, TSieve, TTmpInput, TTmpOutput> TFileMerger;
+
+ struct TSortPortionTask: public IObjectInQueue {
+ TSorter* Sorter;
+ TSorterBuf* SorterBuf;
+ int Portion;
+
+ TSortPortionTask(TSorter* sorter, TSorterBuf* sorterBuf, int portion)
+ : Sorter(sorter)
+ , SorterBuf(sorterBuf)
+ , Portion(portion)
+ {
+ }
+
+ void Process(void*) override {
+ TAutoPtr<TSortPortionTask> This(this);
+ // fprintf(stderr, "SortPortion: %i\n", Portion);
+ Sorter->SortPortion(SorterBuf);
+ }
+ };
+
+ class TSorterBufQueue {
+ private:
+ TMutex Mutex;
+ TCondVar Cond;
+ TVector<TSimpleSharedPtr<TSorterBuf>> V;
+ TDeque<TSorterBuf*> Q;
+
+ int Memory, PageSize, MaxSorterBufs;
+
+ public:
+ TSorterBufQueue(int memory, int pageSize, int maxSorterBufs)
+ : Memory(memory)
+ , PageSize(pageSize)
+ , MaxSorterBufs(maxSorterBufs)
+ {
+ }
+
+ void Push(TSorterBuf* sb) {
+ TGuard<TMutex> guard(Mutex);
+ sb->Clear();
+ Q.push_back(sb);
+ Cond.Signal();
+ }
+
+ TSorterBuf* Pop() {
+ TGuard<TMutex> guard(Mutex);
+ if (!Q.size() && V.ysize() < MaxSorterBufs) {
+ V.push_back(new TSorterBuf(Memory / MaxSorterBufs, PageSize));
+ return V.back().Get();
+ } else {
+ while (!Q.size())
+ Cond.Wait(Mutex);
+ TSorterBuf* t = Q.front();
+ Q.pop_front();
+ return t;
+ }
+ }
+
+ void Clear() {
+ TGuard<TMutex> guard(Mutex);
+ Q.clear();
+ V.clear();
+ }
+
+ void WaitAll() {
+ TGuard<TMutex> guard(Mutex);
+ while (Q.size() < V.size()) {
+ Cond.Wait(Mutex);
+ }
+ }
+
+ int GetMaxSorterBufs() const {
+ return MaxSorterBufs;
+ }
+ };
+
+public:
+ TPowerSorter(const TSimpleSharedPtr<TThreadPool>& mtpQueue, size_t maxSorterBufs,
+ const char* name, size_t memory, size_t pageSize, size_t bufSize)
+ : MaxSorterBufs(maxSorterBufs)
+ , Name(name)
+ , Memory(memory)
+ , PageSize(pageSize)
+ , BufSize(bufSize)
+ , MtpQueue(mtpQueue)
+ , PortionManager(new TPortionManager)
+ , SBQueue(Memory, PageSize, MaxSorterBufs)
+ , FileMerger(MtpQueue, PortionManager, Memory, PageSize, true)
+ {
+ }
+
+ TPowerSorter(size_t maxSorterBufs,
+ const char* name, size_t memory, size_t pageSize, size_t bufSize)
+ : MaxSorterBufs(maxSorterBufs)
+ , Name(name)
+ , Memory(memory)
+ , PageSize(pageSize)
+ , BufSize(bufSize)
+ , PortionManager(new TPortionManager)
+ , SBQueue(Memory, PageSize, maxSorterBufs)
+ , FileMerger(MtpQueue, PortionManager, Memory, PageSize, true)
+ {
+ }
+
+ TPowerSorter(const char* name, size_t memory, size_t pageSize, size_t bufSize)
+ : MaxSorterBufs(5)
+ , Name(name)
+ , Memory(memory)
+ , PageSize(pageSize)
+ , BufSize(bufSize)
+ , PortionManager(new TPortionManager)
+ , SBQueue(Memory, PageSize, MaxSorterBufs)
+ , FileMerger(MtpQueue, PortionManager, Memory, PageSize, true)
+ {
+ }
+
+ ~TPowerSorter() {
+ Close();
+ }
+
+ void Open(const char* tempDir) {
+ Close();
+ CurSB = SBQueue.Pop();
+ PortionManager->Open(tempDir);
+ }
+
+ void Reopen(const char* fname) {
+ Open(fname);
+ }
+
+ void Close() {
+ CurSB = nullptr;
+ SBQueue.Clear();
+ PortionCount = 0;
+ FileMerger.Close();
+ PortionManager->Close();
+ }
+
+ const TRec* Push(const TRec* v) {
+ CheckOpen("Push");
+ const TRec* u = CurSB->Push(v);
+ if (!u) {
+ NextPortion();
+ u = CurSB->Push(v);
+ }
+ return u;
+ }
+
+ void Sort(int maxPortions = 1000) {
+ CheckOpen("Sort");
+ if (!PortionCount) {
+ CurSB->Sort();
+ } else {
+ NextPortion();
+ SBQueue.Push(CurSB);
+ CurSB = nullptr;
+ SBQueue.WaitAll();
+ SBQueue.Clear();
+ FileMerger.Merge(maxPortions);
+ }
+ }
+
+ const TRec* Next() {
+ return PortionCount ? FileMerger.Next() : CurSB->Next();
+ }
+
+ const TRec* Current() {
+ return PortionCount ? FileMerger.Current() : CurSB->Current();
+ }
+
+ int GetBufSize() const {
+ return BufSize;
+ }
+
+ int GetPageSize() const {
+ return PageSize;
+ }
+
+ const char* GetName() const {
+ return Name.data();
+ }
+
+private:
+ void CheckOpen(const char* m) {
+ if (!CurSB)
+ ythrow yexception() << "TPowerSorter::" << m << ": the sorter is not open";
+ }
+
+ void NextPortion() {
+ if (!CurSB->Size())
+ return;
+ ++PortionCount;
+ if (MaxSorterBufs <= 1) {
+ SortPortion(CurSB);
+ } else {
+ if (!MtpQueue.Get()) {
+ MtpQueue.Reset(new TThreadPool);
+ MtpQueue->Start(MaxSorterBufs - 1);
+ FileMerger.SetMtpQueue(MtpQueue);
+ }
+ if (!MtpQueue->Add(new TSortPortionTask(this, CurSB, PortionCount)))
+ ythrow yexception() << "TPowerSorter::NextPortion: failed to add task";
+ }
+ CurSB = SBQueue.Pop();
+ }
+
+ void SortPortion(TSorterBuf* sorterBuf) {
+ TString portionFilename = PortionManager->Next();
+ try {
+ sorterBuf->Sort();
+
+ // fprintf(stderr, "TPowerSorter::SortPortion: -> %s\n", ~portionFilename);
+ TTmpOut out("powersorter-portion", PageSize, BufSize, 0);
+ out.Open(portionFilename.data());
+
+ while (sorterBuf->Next())
+ out.Push(sorterBuf->Current());
+
+ out.Close();
+ FileMerger.Add(portionFilename);
+ SBQueue.Push(sorterBuf);
+ } catch (const yexception& e) {
+ unlink(portionFilename.data());
+ ythrow yexception() << "SortPortion: " << e.what();
+ }
+ }
+
+private:
+ int MaxSorterBufs = 0;
+ TString Name;
+ int Memory = 0;
+ int PageSize = 0;
+ int BufSize = 0;
+
+ TMutex Mutex;
+ TSimpleSharedPtr<TThreadPool> MtpQueue;
+ TSimpleSharedPtr<TPortionManager> PortionManager;
+
+ TSorterBufQueue SBQueue;
+ TSorterBuf* CurSB = nullptr;
+ int PortionCount = 0;
+
+ TFileMerger FileMerger;
+};
diff --git a/library/cpp/microbdb/reader.h b/library/cpp/microbdb/reader.h
new file mode 100644
index 0000000000..694a2f1766
--- /dev/null
+++ b/library/cpp/microbdb/reader.h
@@ -0,0 +1,354 @@
+#pragma once
+
+#include "align.h"
+#include "header.h"
+#include "extinfo.h"
+
+#include <contrib/libs/zlib/zlib.h>
+#include <contrib/libs/fastlz/fastlz.h>
+#include <contrib/libs/snappy/snappy.h>
+
+#include <util/generic/vector.h>
+#include <util/memory/tempbuf.h>
+
+namespace NMicroBDB {
+ static const size_t DEFAULT_BUFFER_SIZE = (64 << 10);
+
+ //!
+ template <class TVal>
+ class IBasePageReader {
+ public:
+ virtual size_t GetRecSize() const = 0;
+ virtual size_t GetExtSize() const = 0;
+ virtual bool GetExtInfo(typename TExtInfoType<TVal>::TResult* extInfo) const = 0;
+ virtual const ui8* GetExtInfoRaw(size_t* len) const = 0;
+ virtual const TVal* Next() = 0;
+ virtual void Reset() = 0;
+ //! set clearing flag, so temporary buffers will be cleared
+ //! in next call of Next()
+ virtual void SetClearFlag() {
+ }
+
+ virtual ~IBasePageReader() {
+ }
+ };
+
+ template <class TVal, typename TPageIter>
+ class TRawPageReader: public IBasePageReader<TVal> {
+ public:
+ TRawPageReader(TPageIter* const iter)
+ : PageIter(iter)
+ {
+ Reset();
+ }
+
+ bool GetExtInfo(typename TExtInfoType<TVal>::TResult* extInfo) const override {
+ Y_VERIFY(TExtInfoType<TVal>::Exists, "GetExtInfo should only be used with extended records");
+ if (!Rec)
+ return false;
+ ui8* raw = (ui8*)Rec + RecSize + ExtLenSize;
+ return extInfo->ParseFromArray(raw, ExtSize);
+ }
+
+ size_t GetRecSize() const override {
+ return RecSize + ExtLenSize;
+ }
+
+ size_t GetExtSize() const override {
+ return ExtSize;
+ }
+
+ const ui8* GetExtInfoRaw(size_t* len) const override {
+ Y_VERIFY(TExtInfoType<TVal>::Exists, "GetExtInfo should only be used with extended records");
+ if (!Rec) {
+ *len = 0;
+ return nullptr;
+ }
+ *len = ExtLenSize + ExtSize;
+ return (ui8*)Rec + RecSize;
+ }
+
+ const TVal* Next() override {
+ if (!Rec)
+ Rec = (TVal*)((char*)PageIter->Current() + sizeof(TDatPage));
+ else
+ Rec = (TVal*)((char*)Rec + DatCeil(RecSize + ExtLenSize + ExtSize));
+ if (!TExtInfoType<TVal>::Exists)
+ RecSize = SizeOf(Rec);
+ else
+ RecSize = SizeOfExt(Rec, &ExtLenSize, &ExtSize);
+ return Rec;
+ }
+
+ void Reset() override {
+ Rec = nullptr;
+ RecSize = 0;
+ ExtLenSize = 0;
+ ExtSize = 0;
+ }
+
+ private:
+ const TVal* Rec;
+ size_t RecSize;
+ size_t ExtLenSize;
+ size_t ExtSize;
+ TPageIter* const PageIter;
+ };
+
+ template <class TVal, typename TPageIter>
+ class TCompressedReader: public IBasePageReader<TVal> {
+ inline size_t GetFirstRecordSize(const TVal* const in) const {
+ if (!TExtInfoType<TVal>::Exists) {
+ return DatCeil(SizeOf(in));
+ } else {
+ size_t ll;
+ size_t l;
+ size_t ret = SizeOfExt(in, &ll, &l);
+
+ return DatCeil(ret + ll + l);
+ }
+ }
+
+ void DecompressBlock() {
+ if (PageIter->IsFrozen() && Buffer.Get())
+ Blocks.push_back(Buffer.Release());
+
+ const TCompressedHeader* hdr = (const TCompressedHeader*)(Page);
+
+ Page += sizeof(TCompressedHeader);
+
+ const size_t first = GetFirstRecordSize((const TVal*)Page);
+
+ if (!Buffer.Get() || Buffer->Size() < hdr->Original)
+ Buffer.Reset(new TTempBuf(Max<size_t>(hdr->Original, DEFAULT_BUFFER_SIZE)));
+
+ memcpy(Buffer->Data(), Page, first);
+ Page += first;
+
+ if (hdr->Count > 1) {
+ switch (Algo) {
+ case MBDB_COMPRESSION_ZLIB: {
+ uLongf dst = hdr->Original - first;
+
+ int ret = uncompress((Bytef*)Buffer->Data() + first, &dst, Page, hdr->Compressed);
+
+ if (ret != Z_OK)
+ ythrow yexception() << "error then uncompress " << ret;
+ } break;
+ case MBDB_COMPRESSION_FASTLZ: {
+ int dst = hdr->Original - first;
+ int ret = yfastlz_decompress(Page, hdr->Compressed, Buffer->Data() + first, dst);
+
+ if (!ret)
+ ythrow yexception() << "error then uncompress";
+ } break;
+ case MBDB_COMPRESSION_SNAPPY: {
+ if (!snappy::RawUncompress((const char*)Page, hdr->Compressed, Buffer->Data() + first))
+ ythrow yexception() << "error then uncompress";
+ } break;
+ }
+ }
+
+ Rec = nullptr;
+ RecNum = hdr->Count;
+ Page += hdr->Compressed;
+ }
+
+ void ClearBuffer() {
+ for (size_t i = 0; i < Blocks.size(); ++i)
+ delete Blocks[i];
+ Blocks.clear();
+ ClearFlag = false;
+ }
+
+ public:
+ TCompressedReader(TPageIter* const iter)
+ : Rec(nullptr)
+ , RecSize(0)
+ , ExtLenSize(0)
+ , ExtSize(0)
+ , Page(nullptr)
+ , PageIter(iter)
+ , RecNum(0)
+ , BlockNum(0)
+ , ClearFlag(false)
+ {
+ }
+
+ ~TCompressedReader() override {
+ ClearBuffer();
+ }
+
+ size_t GetRecSize() const override {
+ return RecSize + ExtLenSize;
+ }
+
+ size_t GetExtSize() const override {
+ return ExtSize;
+ }
+
+ bool GetExtInfo(typename TExtInfoType<TVal>::TResult* extInfo) const override {
+ Y_VERIFY(TExtInfoType<TVal>::Exists, "GetExtInfo should only be used with extended records");
+ if (!Rec)
+ return false;
+ ui8* raw = (ui8*)Rec + RecSize + ExtLenSize;
+ return extInfo->ParseFromArray(raw, ExtSize);
+ }
+
+ const ui8* GetExtInfoRaw(size_t* len) const override {
+ Y_VERIFY(TExtInfoType<TVal>::Exists, "GetExtInfo should only be used with extended records");
+ if (!Rec) {
+ *len = 0;
+ return nullptr;
+ }
+ *len = ExtLenSize + ExtSize;
+ return (ui8*)Rec + RecSize;
+ }
+
+ const TVal* Next() override {
+ Y_ASSERT(RecNum >= 0);
+
+ if (ClearFlag)
+ ClearBuffer();
+
+ if (!Page) {
+ if (!PageIter->Current())
+ return nullptr;
+
+ Page = (ui8*)PageIter->Current() + sizeof(TDatPage);
+
+ BlockNum = ((TCompressedPage*)Page)->BlockCount - 1;
+ Algo = (ECompressionAlgorithm)((TCompressedPage*)Page)->Algorithm;
+ Page += sizeof(TCompressedPage);
+
+ DecompressBlock();
+ }
+
+ if (!RecNum) {
+ if (BlockNum <= 0)
+ return nullptr;
+ else {
+ --BlockNum;
+ DecompressBlock();
+ }
+ }
+
+ --RecNum;
+ if (!Rec)
+ Rec = (const TVal*)Buffer->Data();
+ else
+ Rec = (const TVal*)((char*)Rec + DatCeil(RecSize + ExtLenSize + ExtSize));
+
+ if (!TExtInfoType<TVal>::Exists)
+ RecSize = SizeOf(Rec);
+ else
+ RecSize = SizeOfExt(Rec, &ExtLenSize, &ExtSize);
+
+ return Rec;
+ }
+
+ void Reset() override {
+ Page = nullptr;
+ BlockNum = 0;
+ Rec = nullptr;
+ RecSize = 0;
+ ExtLenSize = 0;
+ ExtSize = 0;
+ RecNum = 0;
+ }
+
+ void SetClearFlag() override {
+ ClearFlag = true;
+ }
+
+ public:
+ THolder<TTempBuf> Buffer;
+ TVector<TTempBuf*> Blocks;
+ const TVal* Rec;
+ size_t RecSize;
+ size_t ExtLenSize;
+ size_t ExtSize;
+ const ui8* Page;
+ TPageIter* const PageIter;
+ int RecNum; //!< count of recs in current block
+ int BlockNum;
+ ECompressionAlgorithm Algo;
+ bool ClearFlag;
+ };
+
+ class TZLibCompressionImpl {
+ public:
+ static const ECompressionAlgorithm Code = MBDB_COMPRESSION_ZLIB;
+
+ inline void Init() {
+ // -
+ }
+
+ inline void Term() {
+ // -
+ }
+
+ inline size_t CompressBound(size_t size) const noexcept {
+ return ::compressBound(size);
+ }
+
+ inline void Compress(void* out, size_t& outSize, const void* in, size_t inSize) {
+ uLongf size = outSize;
+
+ if (compress((Bytef*)out, &size, (const Bytef*)in, inSize) != Z_OK)
+ ythrow yexception() << "not compressed";
+ outSize = size;
+ }
+ };
+
+ class TFastlzCompressionImpl {
+ public:
+ static const ECompressionAlgorithm Code = MBDB_COMPRESSION_FASTLZ;
+
+ inline void Init() {
+ // -
+ }
+
+ inline void Term() {
+ // -
+ }
+
+ inline size_t CompressBound(size_t size) const noexcept {
+ size_t rval = size_t(size * 1.07);
+ return rval < 66 ? 66 : rval;
+ }
+
+ inline void Compress(void* out, size_t& outSize, const void* in, size_t inSize) {
+ outSize = yfastlz_compress_level(2, in, inSize, out);
+ if (!outSize)
+ ythrow yexception() << "not compressed";
+ }
+ };
+
+ class TSnappyCompressionImpl {
+ public:
+ static const ECompressionAlgorithm Code = MBDB_COMPRESSION_SNAPPY;
+
+ inline void Init() {
+ // -
+ }
+
+ inline void Term() {
+ // -
+ }
+
+ inline size_t CompressBound(size_t size) const noexcept {
+ return snappy::MaxCompressedLength(size);
+ }
+
+ inline void Compress(void* out, size_t& outSize, const void* in, size_t inSize) {
+ snappy::RawCompress((const char*)in, inSize, (char*)out, &outSize);
+ }
+ };
+
+}
+
+using TFakeCompression = void;
+using TZLibCompression = NMicroBDB::TZLibCompressionImpl;
+using TFastlzCompression = NMicroBDB::TFastlzCompressionImpl;
+using TSnappyCompression = NMicroBDB::TSnappyCompressionImpl;
diff --git a/library/cpp/microbdb/safeopen.h b/library/cpp/microbdb/safeopen.h
new file mode 100644
index 0000000000..c328ffd575
--- /dev/null
+++ b/library/cpp/microbdb/safeopen.h
@@ -0,0 +1,792 @@
+#pragma once
+
+// util
+#include <util/generic/yexception.h>
+#include <util/generic/vector.h>
+#include <util/string/util.h>
+#include <util/system/mutex.h>
+#include <thread>
+
+#include "microbdb.h"
+
+#if defined(_MSC_VER)
+#pragma warning(push)
+#pragma warning(disable : 4706) /*assignment within conditional expression*/
+#pragma warning(disable : 4267) /*conversion from 'size_t' to 'type', possible loss of data*/
+#endif
+
+template <typename TVal, typename TPageFile = TInputPageFile, typename TIterator = TInputPageIterator<TPageFile>>
+class TInDatFile: protected TInDatFileImpl<TVal, TInputRecordIterator<TVal, TIterator>> {
+public:
+ typedef TVal TRec;
+ typedef TInDatFileImpl<TVal, TInputRecordIterator<TVal, TIterator>> TBase;
+
+ TInDatFile(const TString& name, size_t pages, int pagesOrBytes = 1)
+ : Name(name)
+ , Pages(pages)
+ , PagesOrBytes(pagesOrBytes)
+ {
+ }
+
+ ~TInDatFile() {
+ Close();
+ }
+
+ void Open(const TString& fname, bool direct = false) {
+ ui32 gotRecordSig = 0;
+ int ret = TBase::Open(fname.data(), Pages, PagesOrBytes, &gotRecordSig, direct);
+ if (ret) {
+ // XXX: print record type name, not type sig
+ ythrow yexception() << ErrorMessage(ret, "Failed to open input file", fname, TVal::RecordSig, gotRecordSig);
+ }
+ Name = fname;
+ }
+
+ void OpenStream(TAutoPtr<IInputStream> input) {
+ ui32 gotRecordSig = 0;
+ int ret = TBase::Open(input, Pages, PagesOrBytes, &gotRecordSig);
+ if (ret) {
+ // XXX: print record type name, not type sig
+ ythrow yexception() << ErrorMessage(ret, "Failed to open input file", Name, TVal::RecordSig, gotRecordSig);
+ }
+ }
+
+ void Close() {
+ int ret;
+ if (IsOpen() && (ret = TBase::GetError()))
+ if (!std::uncaught_exception())
+ ythrow yexception() << ErrorMessage(ret, "Error before closing input file", Name);
+ if ((ret = TBase::Close()))
+ if (!std::uncaught_exception())
+ ythrow yexception() << ErrorMessage(ret, "Error while closing input file", Name);
+ }
+
+ const char* GetName() const {
+ return Name.data();
+ }
+
+ using TBase::Current;
+ using TBase::Freeze;
+ using TBase::GetError;
+ using TBase::GetExtInfo;
+ using TBase::GetExtInfoRaw;
+ using TBase::GetExtSize;
+ using TBase::GetLastPage;
+ using TBase::GetPageNum;
+ using TBase::GetPageSize;
+ using TBase::GetRecSize;
+ using TBase::GotoLastPage;
+ using TBase::GotoPage;
+ using TBase::IsEof;
+ using TBase::IsOpen;
+ using TBase::Next;
+ using TBase::Skip;
+ using TBase::Unfreeze;
+
+protected:
+ TString Name;
+ size_t Pages;
+ int PagesOrBytes;
+};
+
+template <typename TVal>
+class TMappedInDatFile: protected TInDatFileImpl<TVal, TInputRecordIterator<TVal, TMappedInputPageIterator<TMappedInputPageFile>>> {
+public:
+ typedef TVal TRec;
+ typedef TInDatFileImpl<TVal, TInputRecordIterator<TVal, TMappedInputPageIterator<TMappedInputPageFile>>> TBase;
+
+ TMappedInDatFile(const TString& name, size_t /* pages */, int /* pagesOrBytes */)
+ : Name(name)
+ {
+ }
+
+ ~TMappedInDatFile() {
+ Close();
+ }
+
+ void Open(const TString& fname) {
+ int ret = TBase::Open(fname.data());
+ if (ret)
+ ythrow yexception() << ErrorMessage(ret, "Failed to open mapped file", fname, TVal::RecordSig);
+ Name = fname;
+ }
+
+ void Close() {
+ int ret;
+ if (IsOpen() && (ret = TBase::GetError()))
+ if (!std::uncaught_exception())
+ ythrow yexception() << ErrorMessage(ret, "Error before closing mapped file", Name);
+ if ((ret = TBase::Close()))
+ if (!std::uncaught_exception())
+ ythrow yexception() << ErrorMessage(ret, "Error while closing mapped file", Name);
+ }
+
+ const char* GetName() const {
+ return Name.data();
+ }
+
+ using TBase::Current;
+ using TBase::GetError;
+ using TBase::GetExtInfo;
+ using TBase::GetExtInfoRaw;
+ using TBase::GetLastPage;
+ using TBase::GetPageNum;
+ using TBase::GetPageSize;
+ using TBase::GotoLastPage;
+ using TBase::GotoPage;
+ using TBase::IsEof;
+ using TBase::IsOpen;
+ using TBase::Next;
+ using TBase::Skip;
+
+protected:
+ TString Name;
+};
+
+template <typename TVal, typename TCompressor = TFakeCompression, typename TPageFile = TOutputPageFile>
+class TOutDatFile: protected TOutDatFileImpl<TVal, TOutputRecordIterator<TVal, TOutputPageIterator<TPageFile>, TFakeIndexer, TCompressor>> {
+public:
+ typedef TOutDatFileImpl<TVal, TOutputRecordIterator<TVal, TOutputPageIterator<TPageFile>, TFakeIndexer, TCompressor>> TBase;
+
+ TOutDatFile(const TString& name, size_t pagesize, size_t pages, int pagesOrBytes = 1)
+ : Name(name)
+ , PageSize(pagesize)
+ , Pages(pages)
+ , PagesOrBytes(pagesOrBytes)
+ {
+ }
+
+ ~TOutDatFile() {
+ Close();
+ }
+
+ void Open(const char* fname, bool direct = false) {
+ int ret = TBase::Open(fname, PageSize, Pages, PagesOrBytes, direct);
+ if (ret)
+ ythrow yexception() << ErrorMessage(ret, "Failed to open output file", fname);
+ Name = fname;
+ }
+
+ void Open(const TString& fname) {
+ Open(fname.data());
+ }
+
+ void OpenStream(TAutoPtr<IOutputStream> output) {
+ int ret = TBase::Open(output, PageSize, Pages, PagesOrBytes);
+ if (ret)
+ ythrow yexception() << ErrorMessage(ret, "Failed to open output stream", Name);
+ }
+
+ void Close() {
+ int ret;
+ if ((ret = TBase::GetError()))
+ if (!std::uncaught_exception())
+ ythrow yexception() << ErrorMessage(ret, "Error before closing output file", Name);
+ if ((ret = TBase::Close()))
+ if (!std::uncaught_exception())
+ ythrow yexception() << ErrorMessage(ret, "Error while closing output file", Name);
+ }
+
+ const char* GetName() const {
+ return Name.data();
+ }
+
+ using TBase::Freeze;
+ using TBase::GetError;
+ using TBase::GetPageSize;
+ using TBase::IsEof;
+ using TBase::IsOpen;
+ using TBase::Offset;
+ using TBase::Push;
+ using TBase::PushWithExtInfo;
+ using TBase::Reserve;
+ using TBase::Unfreeze;
+
+protected:
+ TString Name;
+ size_t PageSize, Pages;
+ int PagesOrBytes;
+};
+
+template <typename TVal, typename TCompressor, typename TPageFile>
+class TOutDatFileArray;
+
+template <typename TVal, typename TCompressor = TFakeCompression, typename TPageFile = TOutputPageFile>
+class TOutDatFileArray {
+ typedef TOutDatFile<TVal, TCompressor, TPageFile> TFileType;
+
+public:
+ TOutDatFileArray(const TString& name, size_t pagesize, size_t pages, int pagesOrBytes = 1)
+ : Name(name)
+ , PageSize(pagesize)
+ , Pages(pages)
+ , PagesOrBytes(pagesOrBytes)
+ , NumFiles(0)
+ , Files(nullptr)
+ {
+ }
+
+ ~TOutDatFileArray() {
+ for (int i = 0; i < NumFiles; ++i) {
+ Files[i].Close();
+ Files[i].~TFileType();
+ }
+ free(Files);
+ Files = nullptr;
+ NumFiles = 0;
+ }
+
+ TFileType& operator[](size_t pos) {
+ return Files[pos];
+ }
+
+ void Open(int n, const TString& fname) {
+ char temp[FILENAME_MAX];
+
+ Name = fname;
+ NumFiles = CreateDatObjects(n, fname);
+
+ int i;
+ try {
+ for (i = 0; i < NumFiles; ++i) {
+ sprintf(temp, fname.data(), i);
+ Files[i].Open(temp);
+ }
+ } catch (...) {
+ while (--i >= 0)
+ Files[i].Close();
+ throw;
+ }
+ }
+
+ template <typename TNameBuilder>
+ void OpenWithCallback(int n, const TNameBuilder& builder) {
+ NumFiles = CreateDatObjects(n, Name);
+
+ for (int i = 0; i < NumFiles; ++i)
+ Files[i].Open(builder.GetName(i).data());
+ }
+
+ void Close() {
+ for (int i = 0; i < NumFiles; ++i)
+ Files[i].Close();
+ }
+
+ void CloseMT(ui32 threads) {
+ int current = 0;
+ TMutex mutex;
+ TVector<std::thread> thrs;
+ thrs.reserve(threads);
+ for (ui32 i = 0; i < threads; i++) {
+ thrs.emplace_back([this, &current, &mutex]() {
+ while (true) {
+ mutex.Acquire();
+ int cur = current++;
+ mutex.Release();
+ if (cur >= NumFiles)
+ break;
+ Files[cur].Close();
+ }
+ });
+ }
+ for (auto& thread : thrs) {
+ thread.join();
+ }
+ }
+
+ const char* GetName() const {
+ return Name.data();
+ }
+
+protected:
+ int CreateDatObjects(int n, const TString& fname) {
+ if (!(Files = (TFileType*)malloc(n * sizeof(TFileType))))
+ ythrow yexception() << "can't alloc \"" << fname << "\" file array: " << LastSystemErrorText();
+ int num = 0;
+ char temp[FILENAME_MAX];
+ for (int i = 0; i < n; ++i, ++num) {
+ sprintf(temp, "%s[%d]", fname.data(), i);
+ new (Files + i) TFileType(temp, PageSize, Pages, PagesOrBytes);
+ }
+ return num;
+ }
+
+ TString Name;
+ size_t PageSize, Pages;
+ int PagesOrBytes, NumFiles;
+ TFileType* Files;
+};
+
+template <typename TVal, typename TKey, typename TCompressor = TFakeCompression, typename TPageFile = TOutputPageFile>
+class TOutDirectFile: protected TOutDirectFileImpl<TVal, TKey, TCompressor, TPageFile> {
+ typedef TOutDirectFileImpl<TVal, TKey, TCompressor, TPageFile> TBase;
+
+public:
+ TOutDirectFile(const TString& name, size_t pagesize, size_t pages, size_t ipagesize, size_t ipages, int pagesOrBytes)
+ : Name(name)
+ , PageSize(pagesize)
+ , Pages(pages)
+ , IdxPageSize(ipagesize)
+ , IdxPages(ipages)
+ , PagesOrBytes(pagesOrBytes)
+ {
+ }
+
+ ~TOutDirectFile() {
+ Close();
+ }
+
+ void Open(const TString& fname) {
+ int ret = TBase::Open(fname.data(), PageSize, Pages, IdxPageSize, IdxPages, PagesOrBytes);
+ if (ret)
+ ythrow yexception() << ErrorMessage(ret, "Failed to open output file", fname);
+ Name = fname;
+ }
+
+ void Close() {
+ int ret;
+ if ((ret = TBase::GetError()))
+ if (!std::uncaught_exception())
+ ythrow yexception() << ErrorMessage(ret, "Error before closing output file", Name);
+ if ((ret = TBase::Close()))
+ if (!std::uncaught_exception())
+ ythrow yexception() << ErrorMessage(ret, "Error while closing output file", Name);
+ }
+
+ const char* GetName() const {
+ return Name.data();
+ }
+
+ using TBase::Freeze;
+ using TBase::Push;
+ using TBase::PushWithExtInfo;
+ using TBase::Reserve;
+ using TBase::Unfreeze;
+
+protected:
+ TString Name;
+ size_t PageSize, Pages, IdxPageSize, IdxPages;
+ int PagesOrBytes;
+};
+
+template <
+ typename TVal,
+ template <typename T> class TComparer,
+ typename TCompress = TFakeCompression,
+ typename TSieve = TFakeSieve<TVal>,
+ typename TPageFile = TOutputPageFile,
+ typename TFileTypes = TDefInterFileTypes>
+class TDatSorter: protected TDatSorterImpl<TVal, TComparer<TVal>, TCompress, TSieve, TPageFile, TFileTypes> {
+ typedef TDatSorterImpl<TVal, TComparer<TVal>, TCompress, TSieve, TPageFile, TFileTypes> TBase;
+
+public:
+ typedef TVal TRec;
+
+public:
+ TDatSorter(const TString& name, size_t memory, size_t pagesize, size_t pages, int pagesOrBytes = 1)
+ : Name(name)
+ , Memory(memory)
+ , PageSize(pagesize)
+ , Pages(pages)
+ , PagesOrBytes(pagesOrBytes)
+ {
+ Templ[0] = 0;
+ }
+
+ ~TDatSorter() {
+ Close();
+ Templ[0] = 0;
+ }
+
+ void Open(const TString& dirName) {
+ int ret;
+ if (ret = MakeSorterTempl(Templ, dirName.data())) {
+ Templ[0] = 0;
+ ythrow yexception() << ErrorMessage(ret, Name + " sorter: bad tempdir", dirName);
+ }
+ if ((ret = TBase::Open(Templ, PageSize, Pages, PagesOrBytes)))
+ ythrow yexception() << ErrorMessage(ret, Name + " sorter: open error, temp dir", Templ);
+ }
+
+ void Sort(bool direct = false) {
+ int ret = TBase::Sort(Memory, 1000, direct);
+ if (ret)
+ ythrow yexception() << ErrorMessage(ret, Name + " sorter: sort error, temp dir", Templ, TVal::RecordSig);
+ }
+
+ void SortToFile(const TString& name) {
+ int ret = TBase::SortToFile(name.data(), Memory);
+ if (ret)
+ ythrow yexception() << ErrorMessage(ret, Name + "sorter: error in SortToFile", name, TVal::RecordSig);
+ }
+
+ void SortToStream(TAutoPtr<IOutputStream> output) {
+ int ret = TBase::SortToStream(output, Memory);
+ if (ret)
+ ythrow yexception() << ErrorMessage(ret, Name + "sorter: error in SortToStream", "", TVal::RecordSig);
+ }
+
+ void Close() {
+ int ret1 = TBase::GetError();
+ int ret2 = TBase::Close();
+ if (Templ[0]) {
+ *strrchr(Templ, GetDirectorySeparator()) = 0;
+ RemoveDirWithContents(Templ);
+ Templ[0] = 0;
+ }
+ if (ret1)
+ if (!std::uncaught_exception())
+ ythrow yexception() << ErrorMessage(ret1, Name + "sorter: error before closing");
+ if (ret2)
+ if (!std::uncaught_exception())
+ ythrow yexception() << ErrorMessage(ret2, Name + "sorter: error while closing");
+ }
+
+ int Sort(size_t memory, int maxportions, bool direct = false) {
+ return TBase::Sort(memory, maxportions, direct);
+ }
+
+ const char* GetName() const {
+ return Name.data();
+ }
+
+ using TBase::GetPageSize;
+ using TBase::GetPages;
+ using TBase::Next;
+ using TBase::NextPortion;
+ using TBase::Push;
+ using TBase::PushWithExtInfo;
+ using TBase::UseSegmentSorter;
+
+protected:
+ TString Name;
+ size_t Memory, PageSize, Pages;
+ int PagesOrBytes;
+ char Templ[FILENAME_MAX];
+};
+
+template <typename TSorter>
+class TSorterArray {
+public:
+ typedef TSorter TDatSorter;
+
+public:
+ TSorterArray(const TString& name, size_t memory, size_t pagesize, size_t pages, int pagesOrBytes = 1)
+ : Name(name)
+ , Memory(memory)
+ , PageSize(pagesize)
+ , Pages(pages)
+ , PagesOrBytes(pagesOrBytes)
+ , NumSorters(0)
+ , Sorters(nullptr)
+ {
+ }
+
+ ~TSorterArray() {
+ for (int i = 0; i < NumSorters; ++i) {
+ Sorters[i].Close();
+ Sorters[i].~TSorter();
+ }
+ free(Sorters);
+ Sorters = nullptr;
+ NumSorters = 0;
+ }
+
+ TSorter& operator[](size_t pos) {
+ return Sorters[pos];
+ }
+
+ void Open(int n, const TString& fname, size_t memory = 0) {
+ if (!(Sorters = (TSorter*)malloc(n * sizeof(TSorter))))
+ ythrow yexception() << "can't alloc \"" << fname << "\" sorter array: " << LastSystemErrorText();
+ NumSorters = n;
+ char temp[FILENAME_MAX];
+ if (memory)
+ Memory = memory;
+ for (int i = 0; i < NumSorters; ++i) {
+ sprintf(temp, "%s[%d]", Name.data(), i);
+ new (Sorters + i) TSorter(temp, Memory, PageSize, Pages, PagesOrBytes);
+ }
+ for (int i = 0; i < NumSorters; ++i)
+ Sorters[i].Open(fname);
+ }
+
+ void Close() {
+ for (int i = 0; i < NumSorters; ++i)
+ Sorters[i].Close();
+ }
+
+ const char* GetName() const {
+ return Name.data();
+ }
+
+protected:
+ TString Name;
+ size_t Memory, PageSize, Pages;
+ int PagesOrBytes, NumSorters;
+ TSorter* Sorters;
+};
+
+template <typename TVal, template <typename T> class TCompare, typename TSieve = TFakeSieve<TVal>>
+class TDatSorterArray: public TSorterArray<TDatSorter<TVal, TCompare, TSieve>> {
+public:
+ TDatSorterArray(const char* name, size_t memory, size_t pagesize, size_t pages, int pagesOrBytes = 1)
+ : TSorterArray<TDatSorter<TVal, TCompare, TSieve>>(name, memory, pagesize, pages, pagesOrBytes)
+ {
+ }
+};
+
+template <typename TVal, template <typename T> class TCompare, typename TCompress = TFakeCompression,
+ typename TSieve = TFakeSieve<TVal>, typename TPageFile = TOutputPageFile, typename TFileTypes = TDefInterFileTypes>
+class TDatSorterMemo: public TDatSorter<TVal, TCompare, TCompress, TSieve, TPageFile, TFileTypes> {
+ typedef TDatSorter<TVal, TCompare, TCompress, TSieve, TPageFile, TFileTypes> TSorter;
+
+public:
+ TOutDatFile<TVal> Memo;
+ TString Home;
+ bool OpenReq;
+ bool Opened;
+ bool UseDirectWrite;
+
+public:
+ TDatSorterMemo(const char* name, size_t memory, size_t pagesize, size_t pages, int pagesOrBytes = 1)
+ : TSorter(name, memory, pagesize, pages, pagesOrBytes)
+ , Memo(name, pagesize, memory, 0)
+ {
+ OpenReq = false;
+ Opened = false;
+ UseDirectWrite = false;
+ }
+
+ void Open(const TString& home) {
+ OpenReq = true;
+ // TSorter::Open(home);
+ Home = home;
+ Memo.Open(nullptr);
+ Memo.Freeze();
+ }
+
+ void Reopen(const char* home) {
+ Close();
+ Open(home);
+ }
+
+ void Open() {
+ if (!OpenReq) {
+ OpenReq = true;
+ Memo.Open(nullptr);
+ Memo.Freeze();
+ }
+ }
+
+ void OpenIfNeeded() {
+ if (OpenReq && !Opened) {
+ if (!Home)
+ ythrow yexception() << "Temp directory not specified, call Open(char*) first : " << TSorter::Name;
+ TSorter::Open(Home);
+ Opened = true;
+ }
+ }
+
+ TVal* Reserve(size_t len) {
+ if (TExtInfoType<TVal>::Exists)
+ return ReserveWithExt(len, 0);
+
+ TVal* u = Memo.Reserve(len);
+ if (!u) {
+ OpenIfNeeded();
+ TSorter::NextPortion(UseDirectWrite);
+ Memo.Freeze();
+ u = Memo.Reserve(len);
+ }
+ TSorter::PushWithExtInfo(u);
+ return u;
+ }
+
+ TVal* ReserveWithExt(size_t len, size_t extSize) {
+ size_t fullLen = len + len_long((i64)extSize) + extSize;
+ TVal* u = Memo.Reserve(fullLen);
+ if (!u) {
+ OpenIfNeeded();
+ TSorter::NextPortion(UseDirectWrite);
+ Memo.Freeze();
+ u = Memo.Reserve(fullLen);
+ if (!u) {
+ if (fullLen > Memo.GetPageSize()) {
+ ythrow yexception() << "Size of element and " << len << " size of extInfo " << extSize
+ << " is larger than page size " << Memo.GetPageSize();
+ }
+ ythrow yexception() << "going to insert a null pointer. Bad.";
+ }
+ }
+ out_long((i64)extSize, (char*)u + len);
+ TSorter::PushWithExtInfo(u);
+ return u;
+ }
+
+ char* GetReservedExt(TVal* rec, size_t len, size_t extSize) {
+ return (char*)rec + len + len_long((i64)extSize);
+ }
+
+ const TVal* Push(const TVal* v, const typename TExtInfoType<TVal>::TResult* extInfo = nullptr) {
+ const TVal* u = Memo.Push(v, extInfo);
+ if (!u) {
+ OpenIfNeeded();
+ TSorter::NextPortion(UseDirectWrite);
+ Memo.Freeze();
+ u = Memo.Push(v, extInfo);
+ if (!u) {
+ if (SizeOf(v) > Memo.GetPageSize()) {
+ ythrow yexception() << "Size of element " << SizeOf(v)
+ << " is larger than page size " << Memo.GetPageSize();
+ }
+ ythrow yexception() << "going to insert a null pointer. Bad.";
+ }
+ }
+ TSorter::PushWithExtInfo(u);
+ return u;
+ }
+
+ const TVal* Push(const TVal* v, const ui8* extInfoRaw, size_t extLen) {
+ const TVal* u = Memo.Push(v, extInfoRaw, extLen);
+ if (!u) {
+ OpenIfNeeded();
+ TSorter::NextPortion(UseDirectWrite);
+ Memo.Freeze();
+ u = Memo.Push(v, extInfoRaw, extLen);
+ if (!u) {
+ if (SizeOf(v) > Memo.GetPageSize()) {
+ ythrow yexception() << "Size of element " << SizeOf(v)
+ << " is larger than page size " << Memo.GetPageSize();
+ }
+ ythrow yexception() << "going to insert a null pointer. Bad..";
+ }
+ }
+ TSorter::PushWithExtInfo(u);
+ return u;
+ }
+
+ const TVal* PushWithExtInfo(const TVal* v) {
+ const TVal* u = Memo.PushWithExtInfo(v);
+ if (!u) {
+ OpenIfNeeded();
+ TSorter::NextPortion(UseDirectWrite);
+ Memo.Freeze();
+ u = Memo.PushWithExtInfo(v);
+ if (!u) {
+ if (SizeOf(v) > Memo.GetPageSize()) {
+ ythrow yexception() << "Size of element " << SizeOf(v)
+ << " is larger than page size " << Memo.GetPageSize();
+ }
+ ythrow yexception() << "going to insert a null pointer. Bad...";
+ }
+ }
+ TSorter::PushWithExtInfo(u);
+ return u;
+ }
+
+ void Sort(bool direct = false) {
+ if (Opened) {
+ TSorter::NextPortion(UseDirectWrite);
+ Memo.Close();
+ OpenReq = false;
+ TSorter::Sort(direct);
+ } else {
+ TSorter::SortPortion();
+ }
+ }
+
+ const TVal* Next() {
+ return Opened ? TSorter::Next() : TSorter::Nextp();
+ }
+
+ bool GetExtInfo(typename TExtInfoType<TVal>::TResult* extInfo) const {
+ return NMicroBDB::GetExtInfo(Current(), extInfo);
+ }
+
+ const ui8* GetExtInfoRaw(size_t* len) const {
+ return NMicroBDB::GetExtInfoRaw(Current(), len);
+ }
+
+ const TVal* Current() const {
+ return Opened ? TSorter::Current() : TSorter::Currentp();
+ }
+
+ int NextPortion() {
+ OpenIfNeeded();
+ return TSorter::NextPortion(UseDirectWrite);
+ }
+
+ void SortToFile(const char* name) {
+ OpenIfNeeded();
+ TSorter::NextPortion(UseDirectWrite);
+ Memo.Close();
+ OpenReq = false;
+ TSorter::SortToFile(name);
+ }
+
+ void SortToStream(TAutoPtr<IOutputStream> output) {
+ OpenIfNeeded();
+ TSorter::NextPortion(UseDirectWrite);
+ Memo.Close();
+ OpenReq = false;
+ TSorter::SortToStream(output);
+ }
+
+ template <typename TKey, typename TOutCompress>
+ void SortToDirectFile(const char* name, size_t ipagesize, size_t ipages) {
+ Sort();
+ TOutDirectFile<TVal, TKey, TOutCompress> out(TSorter::Name, TSorter::PageSize, TSorter::Pages, ipagesize, ipages, TSorter::PagesOrBytes);
+ out.Open(name);
+ while (const TVal* rec = Next())
+ out.PushWithExtInfo(rec);
+ out.Close();
+ }
+
+ template <typename TKey>
+ void SortToDirectFile(const char* name, size_t ipagesize, size_t ipages) {
+ SortToDirectFile<TKey, TCompress>(name, ipagesize, ipages);
+ }
+
+ void CloseSorter() {
+ if (Opened)
+ TSorter::Close();
+ else
+ TSorter::Closep();
+ Memo.Freeze();
+ Opened = false;
+ }
+
+ void Close() {
+ if (Opened)
+ TSorter::Close();
+ else
+ TSorter::Closep();
+ Memo.Close();
+ OpenReq = false;
+ Opened = false;
+ }
+
+ int SavePortions(const char* mask) {
+ return TSorter::SavePortions(mask, UseDirectWrite);
+ }
+
+public:
+ using TSorter::RestorePortions;
+};
+
+template <typename TVal, template <typename T> class TCompare, typename TCompress = TFakeCompression,
+ typename TSieve = TFakeSieve<TVal>, class TPageFile = TOutputPageFile, class TFileTypes = TDefInterFileTypes>
+class TDatSorterMemoArray: public TSorterArray<TDatSorterMemo<TVal, TCompare, TCompress, TSieve, TPageFile, TFileTypes>> {
+public:
+ typedef TSorterArray<TDatSorterMemo<TVal, TCompare, TCompress, TSieve, TPageFile, TFileTypes>> TBase;
+
+ TDatSorterMemoArray(const char* name, size_t memory, size_t pagesize, size_t pages, int pagesOrBytes = 1)
+ : TBase(name, memory, pagesize, pages, pagesOrBytes)
+ {
+ }
+};
+
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif
diff --git a/library/cpp/microbdb/sorter.h b/library/cpp/microbdb/sorter.h
new file mode 100644
index 0000000000..b2e7390377
--- /dev/null
+++ b/library/cpp/microbdb/sorter.h
@@ -0,0 +1,677 @@
+#pragma once
+
+#include <util/ysaveload.h>
+#include <util/generic/algorithm.h>
+#include <contrib/libs/libc_compat/include/link/link.h>
+
+#include "header.h"
+#include "heap.h"
+#include "extinfo.h"
+#include "input.h"
+#include "output.h"
+
+#ifdef TEST_MERGE
+#define MBDB_SORT_FUN ::StableSort
+#else
+#define MBDB_SORT_FUN ::Sort
+#endif
+
+template <class TVal, class TCompare, typename TCompress, typename TSieve, typename TOutPageFile, typename TFileTypes>
+class TDatSorterImpl;
+
+template <class TVal>
+struct TFakeSieve {
+ static inline int Sieve(TVal*, const TVal*) noexcept {
+ return 0;
+ }
+};
+
+template <class TSieve>
+struct TIsSieveFake {
+ static const bool Result = false;
+};
+
+template <class T>
+struct TIsSieveFake<TFakeSieve<T>> {
+ static const bool Result = true;
+};
+
+class TDefInterFileTypes {
+public:
+ typedef TOutputPageFile TOutPageFile;
+ typedef TInputPageFile TInPageFile;
+};
+
+//class TCompressedInterFileTypes;
+
+template <class TVal, class TCompare, typename TCompress, typename TSieve, typename TOutPageFile = TOutputPageFile, typename TFileTypes = TDefInterFileTypes>
+class TDatSorterImplBase: protected THeapIter<TVal, TInDatFileImpl<TVal, TInputRecordIterator<TVal, TInputPageIterator<typename TFileTypes::TInPageFile>>>, TCompare> {
+ typedef TOutputRecordIterator<TVal, TOutputPageIterator<typename TFileTypes::TOutPageFile>, TFakeIndexer, TCompress> TTmpRecIter;
+ typedef TInputRecordIterator<TVal, TInputPageIterator<typename TFileTypes::TInPageFile>> TInTmpRecIter;
+
+public:
+ typedef TOutDatFileImpl<TVal, TTmpRecIter> TTmpOut;
+ typedef TInDatFileImpl<TVal, TInTmpRecIter> TTmpIn;
+
+ typedef TOutDatFileImpl<TVal, TOutputRecordIterator<TVal, TOutputPageIterator<TOutPageFile>, TFakeIndexer, TCompress>> TOut;
+ typedef THeapIter<TVal, TTmpIn, TCompare> TMyHeap;
+ typedef TVector<const TVal*> TMyVector;
+ typedef typename TMyVector::iterator TMyIterator;
+
+ class IPortionSorter {
+ public:
+ virtual ~IPortionSorter() {
+ }
+
+ virtual void Sort(TMyVector&, TTmpOut*) = 0;
+ };
+
+ class TDefaultSorter: public IPortionSorter {
+ public:
+ void Sort(TMyVector& vector, TTmpOut* out) override {
+ MBDB_SORT_FUN(vector.begin(), vector.end(), TCompare());
+
+ const typename TMyVector::const_iterator
+ end = (TIsSieveFake<TSieve>::Result) ? vector.end() : TDatSorterImplBase::SieveRange(vector.begin(), vector.end());
+
+ for (typename TMyVector::const_iterator it = vector.begin(); it != end; ++it) {
+ out->PushWithExtInfo(*it);
+ }
+ }
+ };
+
+ class TSegmentedSorter: public IPortionSorter {
+ class TAdaptor {
+ typedef typename TMyVector::const_iterator TConstIterator;
+
+ public:
+ TAdaptor(TConstIterator b, TConstIterator e)
+ : Curr_(b)
+ , End_(e)
+ {
+ --Curr_;
+ }
+
+ inline const TVal* Current() const {
+ return *Curr_;
+ }
+
+ inline const TVal* Next() {
+ ++Curr_;
+
+ if (Curr_ == End_) {
+ return nullptr;
+ }
+
+ return *Curr_;
+ }
+
+ private:
+ TConstIterator Curr_;
+ TConstIterator End_;
+ };
+
+ typedef THeapIter<TVal, TAdaptor, TCompare> TPortionsHeap;
+
+ public:
+ void Sort(TMyVector& vector, TTmpOut* out) override {
+ TVector<TAdaptor> bounds;
+ typename TMyVector::iterator
+ it = vector.begin();
+ const size_t portions = Max<size_t>(1, (vector.size() * sizeof(TVal)) / (4 << 20));
+ const size_t step = vector.size() / portions;
+
+ // Sort segments
+ while (it != vector.end()) {
+ const typename TMyVector::iterator
+ end = Min(it + step, vector.end());
+
+ MBDB_SORT_FUN(it, end, TCompare());
+
+ bounds.push_back(TAdaptor(it, end));
+
+ it = end;
+ }
+
+ //
+ // Merge result
+ //
+
+ TPortionsHeap heap(bounds);
+
+ if (TIsSieveFake<TSieve>::Result) {
+ while (const TVal* val = heap.Next()) {
+ out->PushWithExtInfo(val);
+ }
+ } else {
+ const TVal* val = heap.Next();
+ const TVal* prev = out->PushWithExtInfo(val);
+
+ for (val = heap.Next(); val && prev; val = heap.Next()) {
+ if (TSieve::Sieve((TVal*)prev, val)) {
+ continue;
+ }
+
+ prev = out->PushWithExtInfo(val);
+ }
+
+ if (prev) {
+ TSieve::Sieve((TVal*)prev, prev);
+ }
+ }
+ }
+ };
+
+public:
+ TDatSorterImplBase()
+ : Sorter(new TDefaultSorter)
+ {
+ InFiles = nullptr;
+ TempBuf = nullptr;
+ Ptr = Vector.end();
+ Cur = nullptr;
+ Portions = CPortions = Error = 0;
+ }
+
+ ~TDatSorterImplBase() {
+ Close();
+ }
+
+ int Open(const char* templ, size_t pagesize, size_t pages, int pagesOrBytes = 1) {
+ Portions = CPortions = Error = 0;
+ TempBuf = strdup(templ);
+ Pagesize = pagesize;
+ if (pagesOrBytes)
+ Pages = pages;
+ else
+ Pages = pages / pagesize;
+ Pages = Max(1, Pages);
+ return 0;
+ }
+
+ void Push(const TVal* v) {
+ // Serialized extInfo must follow a record being pushed, therefore, to avoid
+ // unintentional misusage (as if when you are adding TExtInfo in your record
+ // type: you may forget to check your sorting routines and get a segfault as
+ // a result).
+ // PushWithExtInfo(v) should be called on records with extInfo.
+ static_assert(!TExtInfoType<TVal>::Exists, "expect !TExtInfoType<TVal>::Exists");
+
+ Vector.push_back(v);
+ }
+
+ void PushWithExtInfo(const TVal* v) {
+ Vector.push_back(v);
+ }
+
+ int SortPortion() {
+ Ptr = Vector.end();
+ Cur = nullptr;
+ if (!Vector.size() || Error)
+ return Error;
+
+ MBDB_SORT_FUN(Vector.begin(), Vector.end(), TCompare());
+
+ if (!TIsSieveFake<TSieve>::Result) {
+ const typename TMyVector::iterator
+ end = SieveRange(Vector.begin(), Vector.end());
+
+ Vector.resize(end - Vector.begin());
+ }
+
+ Ptr = Vector.begin();
+ Cur = nullptr;
+ return 0;
+ }
+
+ const TVal* Nextp() {
+ Cur = Ptr == Vector.end() ? nullptr : *Ptr++;
+ return Cur;
+ }
+
+ const TVal* Currentp() const {
+ return Cur;
+ }
+
+ void Closep() {
+ Vector.clear();
+ Ptr = Vector.end();
+ Cur = nullptr;
+ }
+
+ int NextPortion(bool direct = false) {
+ if (!Vector.size() || Error)
+ return Error;
+
+ TTmpOut out;
+ int ret, ret1;
+ char fname[FILENAME_MAX];
+
+ snprintf(fname, sizeof(fname), TempBuf, Portions++);
+ if ((ret = out.Open(fname, Pagesize, Pages, 1, direct)))
+ return Error = ret;
+
+ Sorter->Sort(Vector, &out);
+
+ Vector.erase(Vector.begin(), Vector.end());
+ ret = out.GetError();
+ ret1 = out.Close();
+ Error = Error ? Error : ret ? ret : ret1;
+ if (Error)
+ unlink(fname);
+ return Error;
+ }
+
+ int SavePortions(const char* mask, bool direct = false) {
+ char srcname[PATH_MAX], dstname[PATH_MAX];
+ if (Vector.size())
+ NextPortion(direct);
+ for (int i = 0; i < Portions; i++) {
+ char num[10];
+ sprintf(num, "%i", i);
+ snprintf(srcname, sizeof(srcname), TempBuf, i);
+ snprintf(dstname, sizeof(dstname), mask, num);
+ int res = rename(srcname, dstname);
+ if (res)
+ return res;
+ }
+ snprintf(dstname, sizeof(dstname), mask, "count");
+ TOFStream fcount(dstname);
+ Save(&fcount, Portions);
+ fcount.Finish();
+ return 0;
+ }
+
+ int RestorePortions(const char* mask) {
+ char srcname[PATH_MAX], dstname[PATH_MAX];
+ snprintf(srcname, sizeof(srcname), mask, "count");
+ TIFStream fcount(srcname);
+ Load(&fcount, Portions);
+ for (int i = 0; i < Portions; i++) {
+ char num[10];
+ sprintf(num, "%i", i);
+ snprintf(dstname, sizeof(dstname), TempBuf, i);
+ snprintf(srcname, sizeof(srcname), mask, num);
+ unlink(dstname);
+ int res = link(srcname, dstname);
+ if (res)
+ return res;
+ }
+ return 0;
+ }
+
+ int RestorePortions(const char* mask, ui32 count) {
+ char srcname[PATH_MAX], dstname[PATH_MAX];
+ ui32 portions;
+ TVector<ui32> counts;
+ for (ui32 j = 0; j < count; j++) {
+ snprintf(srcname, sizeof(srcname), mask, j, "count");
+ TIFStream fcount(srcname);
+ Load(&fcount, portions);
+ counts.push_back(portions);
+ Portions += portions;
+ }
+ ui32 p = 0;
+ for (ui32 j = 0; j < count; j++) {
+ int cnt = counts[j];
+ for (int i = 0; i < cnt; i++, p++) {
+ char num[10];
+ sprintf(num, "%i", i);
+ snprintf(dstname, sizeof(dstname), TempBuf, p);
+ snprintf(srcname, sizeof(srcname), mask, j, num);
+ unlink(dstname);
+ int res = link(srcname, dstname);
+ if (res) {
+ fprintf(stderr, "Can not link %s to %s\n", srcname, dstname);
+ return res;
+ }
+ }
+ }
+ return 0;
+ }
+
+ int Sort(size_t memory, int maxportions = 1000, bool direct = false) {
+ int ret, end, beg, i;
+ char fname[FILENAME_MAX];
+
+ if (Vector.size())
+ NextPortion();
+
+ if (Error)
+ return Error;
+ if (!Portions) {
+ TMyHeap::Init(&DummyFile, 1); // closed file
+ HPages = 1;
+ return 0;
+ }
+
+ Optimize(memory, maxportions);
+ if (!(InFiles = new TTmpIn[MPortions]))
+ return MBDB_NO_MEMORY;
+
+ for (beg = 0; beg < Portions && !Error; beg = end) {
+ end = (int)Min(beg + FPortions, Portions);
+ for (i = beg; i < end && !Error; i++) {
+ snprintf(fname, sizeof(fname), TempBuf, i);
+ if ((ret = InFiles[i - beg].Open(fname, HPages, 1, nullptr, direct)))
+ Error = Error ? Error : ret;
+ }
+ if (Error)
+ return Error;
+ TMyHeap::Init(InFiles, end - beg);
+ if (end != Portions) {
+ TTmpOut out;
+ const TVal* v;
+ snprintf(fname, sizeof(fname), TempBuf, Portions++);
+ if ((ret = out.Open(fname, Pagesize, HPages)))
+ return Error = Error ? Error : ret;
+ while ((v = TMyHeap::Next()))
+ out.PushWithExtInfo(v);
+ ret = out.GetError();
+ Error = Error ? Error : ret;
+ ret = out.Close();
+ Error = Error ? Error : ret;
+ for (i = beg; i < end; i++) {
+ ret = InFiles[i - beg].Close();
+ Error = Error ? Error : ret;
+ snprintf(fname, sizeof(fname), TempBuf, CPortions++);
+ unlink(fname);
+ }
+ }
+ FPortions = MPortions;
+ }
+ return Error;
+ }
+
+ int Close() {
+ char fname[FILENAME_MAX];
+ delete[] InFiles;
+ InFiles = nullptr;
+ Closep();
+ for (int i = CPortions; i < Portions; i++) {
+ snprintf(fname, sizeof(fname), TempBuf, i);
+ unlink(fname);
+ }
+ CPortions = Portions = 0;
+ free(TempBuf);
+ TempBuf = nullptr;
+ return Error;
+ }
+
+ void UseSegmentSorter() {
+ Sorter.Reset(new TSegmentedSorter);
+ }
+
+ inline int GetError() const {
+ return Error;
+ }
+
+ inline int GetPages() const {
+ return Pages;
+ }
+
+ inline int GetPageSize() const {
+ return Pagesize;
+ }
+
+private:
+ static TMyIterator SieveRange(const TMyIterator begin, const TMyIterator end) {
+ TMyIterator it = begin;
+ TMyIterator prev = begin;
+
+ for (++it; it != end; ++it) {
+ if (TSieve::Sieve((TVal*)*prev, *it)) {
+ continue;
+ }
+
+ ++prev;
+
+ if (it != prev) {
+ *prev = *it;
+ }
+ }
+
+ TSieve::Sieve((TVal*)*prev, *prev);
+
+ return ++prev;
+ }
+
+protected:
+ void Optimize(size_t memory, int maxportions, size_t fbufmax = 256u << 20) {
+ maxportions = (int)Min((size_t)maxportions, memory / Pagesize) - 1;
+ size_t maxpages = Max((size_t)1u, fbufmax / Pagesize);
+
+ if (maxportions <= 2) {
+ FPortions = MPortions = 2;
+ HPages = 1;
+ return;
+ }
+ if (maxportions >= Portions) {
+ FPortions = MPortions = Portions;
+ HPages = (int)Min(memory / ((Portions + 1) * Pagesize), maxpages);
+ return;
+ }
+ if (((Portions + maxportions - 1) / maxportions) <= maxportions) {
+ while (((Portions + maxportions - 1) / maxportions) <= maxportions)
+ --maxportions;
+ MPortions = ++maxportions;
+ int total = ((Portions + maxportions - 1) / maxportions) + Portions;
+ FPortions = (total % maxportions) ? (total % maxportions) : MPortions;
+ HPages = (int)Min(memory / ((MPortions + 1) * Pagesize), maxpages);
+ return;
+ }
+ FPortions = MPortions = maxportions;
+ HPages = (int)Min(memory / ((MPortions + 1) * Pagesize), maxpages);
+ }
+
+ TMyVector Vector;
+ typename TMyVector::iterator Ptr;
+ const TVal* Cur;
+ TTmpIn *InFiles, DummyFile;
+ char* TempBuf;
+ int Portions, CPortions, Pagesize, Pages, Error;
+ int FPortions, MPortions, HPages;
+ THolder<IPortionSorter> Sorter;
+};
+
+template <class TVal, class TCompare, typename TCompress>
+class TDatSorterImpl<TVal, TCompare, TCompress, TFakeSieve<TVal>, TOutputPageFile, TDefInterFileTypes>
+ : public TDatSorterImplBase<TVal, TCompare, TCompress, TFakeSieve<TVal>, TOutputPageFile, TDefInterFileTypes> {
+ typedef TDatSorterImplBase<TVal, TCompare, TCompress, TFakeSieve<TVal>, TOutputPageFile, TDefInterFileTypes> TBase;
+
+public:
+ int SortToFile(const char* name, size_t memory, int maxportions = 1000) {
+ int ret = TBase::Sort(memory, maxportions);
+ if (ret)
+ return ret;
+ typename TBase::TOut out;
+ if ((ret = out.Open(name, TBase::Pagesize, TBase::HPages)))
+ return ret;
+ const TVal* rec;
+ while ((rec = Next()))
+ out.PushWithExtInfo(rec);
+ if ((ret = out.GetError()))
+ return ret;
+ if ((ret = out.Close()))
+ return ret;
+ if ((ret = TBase::Close()))
+ return ret;
+ return 0;
+ }
+
+ int SortToStream(TAutoPtr<IOutputStream> output, size_t memory, int maxportions = 1000) {
+ int ret = TBase::Sort(memory, maxportions);
+ if (ret)
+ return ret;
+ typename TBase::TOut out;
+ if ((ret = out.Open(output, TBase::Pagesize, TBase::HPages)))
+ return ret;
+ const TVal* rec;
+ while ((rec = Next()))
+ out.PushWithExtInfo(rec);
+ if ((ret = out.GetError()))
+ return ret;
+ if ((ret = out.Close()))
+ return ret;
+ if ((ret = TBase::Close()))
+ return ret;
+ return 0;
+ }
+
+ const TVal* Next() {
+ return TBase::TMyHeap::Next();
+ }
+
+ const TVal* Current() const {
+ return TBase::TMyHeap::Current();
+ }
+
+ bool GetExtInfo(typename TExtInfoType<TVal>::TResult* extInfo) const {
+ return TBase::TMyHeap::GetExtInfo(extInfo);
+ }
+
+ const ui8* GetExtInfoRaw(size_t* len) const {
+ return TBase::TMyHeap::GetExtInfoRaw(len);
+ }
+};
+
+template <class TVal, class TCompare, typename TCompress, typename TSieve,
+ typename TOutPageFile = TOutputPageFile, typename TFileTypes = TDefInterFileTypes>
+class TDatSorterImpl: public TDatSorterImplBase<TVal, TCompare, TCompress, TSieve, TOutPageFile, TFileTypes> {
+ typedef TDatSorterImplBase<TVal, TCompare, TCompress, TSieve, TOutPageFile, TFileTypes> TBase;
+
+public:
+ TDatSorterImpl()
+ : Cur(nullptr)
+ , Prev(nullptr)
+ {
+ }
+
+ int SortToFile(const char* name, size_t memory, int maxportions = 1000) {
+ int ret = Sort(memory, maxportions);
+ if (ret)
+ return ret;
+ typename TBase::TOut out;
+ if ((ret = out.Open(name, TBase::Pagesize, TBase::HPages)))
+ return ret;
+ const TVal* rec;
+ while ((rec = Next()))
+ out.PushWithExtInfo(rec);
+ if ((ret = out.GetError()))
+ return ret;
+ if ((ret = out.Close()))
+ return ret;
+ if ((ret = TBase::Close()))
+ return ret;
+ return 0;
+ }
+
+ int SortToStream(TAutoPtr<IOutputStream> output, size_t memory, int maxportions = 1000) {
+ int ret = Sort(memory, maxportions);
+ if (ret)
+ return ret;
+ typename TBase::TOut out;
+ if ((ret = out.Open(output, TBase::Pagesize, TBase::HPages)))
+ return ret;
+ const TVal* rec;
+ while ((rec = Next()))
+ out.PushWithExtInfo(rec);
+ if ((ret = out.GetError()))
+ return ret;
+ if ((ret = out.Close()))
+ return ret;
+ if ((ret = TBase::Close()))
+ return ret;
+ return 0;
+ }
+
+ int Open(const char* templ, size_t pagesize, size_t pages, int pagesOrBytes = 1) {
+ int res = TBase::Open(templ, pagesize, pages, pagesOrBytes);
+ Prev = nullptr;
+ Cur = nullptr;
+ return res;
+ }
+
+ int Sort(size_t memory, int maxportions = 1000, bool direct = false) {
+ int res = TBase::Sort(memory, maxportions, direct);
+ if (!res) {
+ const TVal* rec = TBase::TMyHeap::Next();
+ if (rec) {
+ size_t els, es;
+ size_t sz = NMicroBDB::SizeOfExt(rec, &els, &es);
+ sz += els + es;
+ if (!TExtInfoType<TVal>::Exists)
+ Cur = (TVal*)malloc(sizeof(TVal));
+ else
+ Cur = (TVal*)malloc(TBase::Pagesize);
+ memcpy(Cur, rec, sz);
+ }
+ }
+ return res;
+ }
+
+ // Prev = last returned
+ // Cur = current accumlating with TSieve
+
+ const TVal* Next() {
+ if (!Cur) {
+ if (Prev) {
+ free(Prev);
+ Prev = nullptr;
+ }
+ return nullptr;
+ }
+ const TVal* rec;
+
+ if (TIsSieveFake<TSieve>::Result)
+ rec = TBase::TMyHeap::Next();
+ else {
+ do {
+ rec = TBase::TMyHeap::Next();
+ } while (rec && TSieve::Sieve((TVal*)Cur, rec));
+ }
+
+ if (!Prev) {
+ if (!TExtInfoType<TVal>::Exists)
+ Prev = (TVal*)malloc(sizeof(TVal));
+ else
+ Prev = (TVal*)malloc(TBase::Pagesize);
+ }
+ size_t els, es;
+ size_t sz = NMicroBDB::SizeOfExt(Cur, &els, &es);
+ sz += els + es;
+ memcpy(Prev, Cur, sz);
+
+ if (rec) {
+ sz = NMicroBDB::SizeOfExt(rec, &els, &es);
+ sz += els + es;
+ memcpy(Cur, rec, sz);
+ } else {
+ TSieve::Sieve((TVal*)Cur, Cur);
+ free(Cur);
+ Cur = nullptr;
+ }
+ return Prev;
+ }
+
+ const TVal* Current() const {
+ return Prev;
+ }
+
+ int Close() {
+ int res = TBase::Close();
+ if (Prev) {
+ free(Prev);
+ Prev = nullptr;
+ }
+ if (Cur) {
+ free(Cur);
+ Cur = nullptr;
+ }
+ return res;
+ }
+
+protected:
+ TVal* Cur;
+ TVal* Prev;
+};
diff --git a/library/cpp/microbdb/sorterdef.h b/library/cpp/microbdb/sorterdef.h
new file mode 100644
index 0000000000..8834b5fff8
--- /dev/null
+++ b/library/cpp/microbdb/sorterdef.h
@@ -0,0 +1,19 @@
+#pragma once
+
+#define MAKESORTERTMPL(TRecord, MemberFunc) \
+ template <typename T> \
+ struct MemberFunc; \
+ template <> \
+ struct MemberFunc<TRecord> { \
+ bool operator()(const TRecord* l, const TRecord* r) { \
+ return TRecord ::MemberFunc(l, r) < 0; \
+ } \
+ int operator()(const TRecord* l, const TRecord* r, int) { \
+ return TRecord ::MemberFunc(l, r); \
+ } \
+ }
+
+template <typename T>
+static inline int compare(const T& a, const T& b) {
+ return (a < b) ? -1 : (a > b);
+}
diff --git a/library/cpp/microbdb/utility.h b/library/cpp/microbdb/utility.h
new file mode 100644
index 0000000000..5c86061bca
--- /dev/null
+++ b/library/cpp/microbdb/utility.h
@@ -0,0 +1,75 @@
+#pragma once
+
+#include "microbdb.h"
+
+template <class TRecord, template <class T> class TCompare>
+int SortData(const TFile& ifile, const TFile& ofile, const TDatMetaPage* meta, size_t memory, const char* tmpDir = nullptr) {
+ char templ[FILENAME_MAX];
+ TInDatFileImpl<TRecord> datin;
+ TOutDatFileImpl<TRecord> datout;
+ TDatSorterImpl<TRecord, TCompare<TRecord>, TFakeCompression, TFakeSieve<TRecord>> sorter;
+ const TRecord* u;
+ int ret;
+
+ const size_t minMemory = (2u << 20);
+ memory = Max(memory, minMemory + minMemory / 2);
+ if (datin.Open(ifile, meta, memory - minMemory, 0))
+ err(1, "can't read input file");
+
+ size_t outpages = Max((size_t)2u, minMemory / datin.GetPageSize());
+ memory -= outpages * datin.GetPageSize();
+
+ if (ret = MakeSorterTempl(templ, tmpDir))
+ err(1, "can't create tempdir in \"%s\"; error: %d\n", templ, ret);
+
+ if (sorter.Open(templ, datin.GetPageSize(), outpages)) {
+ *strrchr(templ, LOCSLASH_C) = 0;
+ RemoveDirWithContents(templ);
+ err(1, "can't open sorter");
+ }
+
+ while (1) {
+ datin.Freeze();
+ while ((u = datin.Next()))
+ sorter.PushWithExtInfo(u);
+ sorter.NextPortion();
+ if (datin.GetError() || datin.IsEof())
+ break;
+ }
+
+ if (datin.GetError()) {
+ *strrchr(templ, LOCSLASH_C) = 0;
+ RemoveDirWithContents(templ);
+ err(1, "in data file error %d", datin.GetError());
+ }
+ if (datin.Close()) {
+ *strrchr(templ, LOCSLASH_C) = 0;
+ RemoveDirWithContents(templ);
+ err(1, "can't close in data file");
+ }
+
+ sorter.Sort(memory);
+
+ if (datout.Open(ofile, datin.GetPageSize(), outpages)) {
+ *strrchr(templ, LOCSLASH_C) = 0;
+ RemoveDirWithContents(templ);
+ err(1, "can't write out file");
+ }
+
+ while ((u = sorter.Next()))
+ datout.PushWithExtInfo(u);
+
+ if (sorter.GetError())
+ err(1, "sorter error %d", sorter.GetError());
+ if (sorter.Close())
+ err(1, "can't close sorter");
+
+ *strrchr(templ, LOCSLASH_C) = 0;
+ RemoveDirWithContents(templ);
+
+ if (datout.GetError())
+ err(1, "out data file error %d", datout.GetError());
+ if (datout.Close())
+ err(1, "can't close out data file");
+ return 0;
+}
diff --git a/library/cpp/microbdb/wrappers.h b/library/cpp/microbdb/wrappers.h
new file mode 100644
index 0000000000..38eb8edebc
--- /dev/null
+++ b/library/cpp/microbdb/wrappers.h
@@ -0,0 +1,637 @@
+#pragma once
+
+#include "microbdb.h"
+
+#define MAKEFILTERTMPL(TRecord, MemberFunc, NS) \
+ template <typename T> \
+ struct MemberFunc; \
+ template <> \
+ struct MemberFunc<TRecord> { \
+ bool operator()(const TRecord* r) { \
+ return NS::MemberFunc(r); \
+ } \
+ }
+
+#define MAKEJOINTMPL(TRecordA, TRecordB, MemberFunc, NS, TMergeType) \
+ template <typename A, typename B> \
+ struct MemberFunc; \
+ template <> \
+ struct MemberFunc<TRecordA, TRecordB> { \
+ int operator()(const TRecordA* l, const TRecordB* r) { \
+ return NS::MemberFunc(l, r); \
+ } \
+ }; \
+ typedef TMergeRec<TRecordA, TRecordB> TMergeType
+
+#define MAKEJOINTMPL2(TRecordA, TRecordB, MemberFunc, StructName, TMergeType) \
+ template <typename A, typename B> \
+ struct StructName; \
+ template <> \
+ struct StructName<TRecordA, TRecordB> { \
+ int operator()(const TRecordA* l, const TRecordB* r) { \
+ return MemberFunc(l, r); \
+ } \
+ }; \
+ typedef TMergeRec<TRecordA, TRecordB> TMergeType
+
+#define MAKEJOINTMPLLEFT(TRecordA, TRecordB, MemberFunc, NS, TMergeType) \
+ template <typename A, typename B> \
+ struct MemberFunc; \
+ template <> \
+ struct MemberFunc<TRecordA, TRecordB> { \
+ int operator()(const TRecordA* l, const TRecordB* r) { \
+ return NS::MemberFunc(l->RecA, r); \
+ } \
+ }; \
+ typedef TMergeRec<TRecordA, TRecordB> TMergeType
+
+template <class TRec>
+class IDatNextSource {
+public:
+ virtual const TRec* Next() = 0;
+ virtual void Work() {
+ }
+};
+
+template <class TRec>
+class IDatNextReceiver {
+public:
+ IDatNextReceiver(IDatNextSource<TRec>& source)
+ : Source(source)
+ {
+ }
+
+ virtual void Work() {
+ Source.Work();
+ }
+
+protected:
+ IDatNextSource<TRec>& Source;
+};
+
+template <class TInRec, class TOutRec>
+class IDatNextChannel: public IDatNextReceiver<TInRec>, public IDatNextSource<TOutRec> {
+public:
+ IDatNextChannel(IDatNextSource<TInRec>& source)
+ : IDatNextReceiver<TInRec>(source)
+ {
+ }
+
+ virtual void Work() {
+ IDatNextReceiver<TInRec>::Work();
+ }
+};
+
+class IDatWorker {
+public:
+ virtual void Work() = 0;
+};
+
+template <class TRec>
+class IDatPushReceiver {
+public:
+ virtual void Push(const TRec* rec) = 0;
+ virtual void Work() = 0;
+};
+
+template <class TRec>
+class IDatPushSource {
+public:
+ IDatPushSource(IDatPushReceiver<TRec>& receiver)
+ : Receiver(receiver)
+ {
+ }
+
+ virtual void Work() {
+ Receiver.Work();
+ }
+
+protected:
+ IDatPushReceiver<TRec>& Receiver;
+};
+
+template <class TInRec, class TOutRec>
+class IDatPushChannel: public IDatPushReceiver<TInRec>, public IDatPushSource<TOutRec> {
+public:
+ IDatPushChannel(IDatPushReceiver<TOutRec>& receiver)
+ : IDatPushSource<TOutRec>(receiver)
+ {
+ }
+
+ virtual void Work() {
+ IDatPushSource<TOutRec>::Work();
+ }
+};
+
+template <class TRec>
+class IDatNextToPush: public IDatNextReceiver<TRec>, public IDatPushSource<TRec> {
+ typedef IDatNextReceiver<TRec> TNextReceiver;
+ typedef IDatPushSource<TRec> TPushSource;
+
+public:
+ IDatNextToPush(IDatNextSource<TRec>& source, IDatPushReceiver<TRec>& receiver)
+ : TNextReceiver(source)
+ , TPushSource(receiver)
+ {
+ }
+
+ virtual void Work() {
+ const TRec* rec;
+ while (rec = TNextReceiver::Source.Next())
+ TPushSource::Receiver.Push(rec);
+ TPushSource::Work();
+ TNextReceiver::Work();
+ }
+};
+
+template <class TRec>
+class TDatNextPNSplitter: public IDatNextReceiver<TRec>, public IDatNextSource<TRec>, public IDatPushSource<TRec> {
+public:
+ TDatNextPNSplitter(IDatNextSource<TRec>& source, IDatPushReceiver<TRec>& receiver)
+ : IDatNextReceiver<TRec>(source)
+ , IDatNextSource<TRec>()
+ , IDatPushSource<TRec>(receiver)
+ {
+ }
+
+ const TRec* Next() {
+ const TRec* rec = IDatNextReceiver<TRec>::Source.Next();
+ if (rec) {
+ IDatPushSource<TRec>::Receiver.Push(rec);
+ return rec;
+ } else {
+ return 0;
+ }
+ }
+
+ virtual void Work() {
+ IDatNextReceiver<TRec>::Work();
+ IDatPushSource<TRec>::Work();
+ }
+};
+
+template <class TRec, class TOutRecA = TRec, class TOutRecB = TRec>
+class TDatPushPPSplitter: public IDatPushReceiver<TRec>, public IDatPushSource<TOutRecA>, public IDatPushSource<TOutRecB> {
+public:
+ TDatPushPPSplitter(IDatPushReceiver<TOutRecA>& receiverA, IDatPushReceiver<TOutRecB>& receiverB)
+ : IDatPushSource<TOutRecA>(receiverA)
+ , IDatPushSource<TOutRecB>(receiverB)
+ {
+ }
+
+ void Push(const TRec* rec) {
+ IDatPushSource<TOutRecA>::Receiver.Push(rec);
+ IDatPushSource<TOutRecB>::Receiver.Push(rec);
+ }
+
+ void Work() {
+ IDatPushSource<TOutRecA>::Work();
+ IDatPushSource<TOutRecB>::Work();
+ }
+};
+
+template <class TRec>
+class TFastInDatFile: public TInDatFile<TRec>, public IDatNextSource<TRec> {
+public:
+ typedef TInDatFile<TRec> Base;
+
+ TFastInDatFile(const char* name, bool open = true, size_t pages = dbcfg::fbufsize, int pagesOrBytes = 0)
+ : TInDatFile<TRec>(name, pages, pagesOrBytes)
+ , FileName(name)
+ {
+ if (open)
+ Base::Open(name);
+ }
+
+ void Open() {
+ Base::Open(FileName);
+ }
+
+ template <class TPassRec>
+ bool PassToUid(const TRec* inrec, const TPassRec* torec) {
+ inrec = Base::Current();
+ while (inrec && CompareUids(inrec, torec) < 0)
+ inrec = Base::Next();
+ return (inrec && CompareUids(inrec, torec) == 0);
+ }
+
+ void Work() {
+ Base::Close();
+ }
+
+ const TRec* Next() {
+ return Base::Next();
+ }
+
+private:
+ TString FileName;
+};
+
+template <class TRec>
+class TPushOutDatFile: public TOutDatFile<TRec>, public IDatPushReceiver<TRec> {
+public:
+ typedef TOutDatFile<TRec> Base;
+
+ TPushOutDatFile(const char* name, bool open = true)
+ : Base(name, dbcfg::pg_docuid, dbcfg::fbufsize, 0)
+ , FileName(name)
+ {
+ if (open)
+ Base::Open(name);
+ }
+
+ void Open() {
+ Base::Open(~FileName);
+ }
+
+ void Push(const TRec* rec) {
+ Base::Push(rec);
+ }
+
+ void Work() {
+ Base::Close();
+ }
+
+private:
+ TString FileName;
+};
+
+template <class TRec>
+class TNextOutDatFile: public IDatNextToPush<TRec> {
+public:
+ typedef IDatNextToPush<TRec> TBase;
+
+ TNextOutDatFile(const char* name, IDatNextSource<TRec>& source, bool open = true)
+ : TBase(source, File)
+ , File(name, open)
+ {
+ }
+
+ void Open() {
+ File.Open();
+ }
+
+private:
+ TPushOutDatFile<TRec> File;
+};
+
+template <class TVal, template <typename T> class TCompare>
+class TNextDatSorterMemo: public TDatSorterMemo<TVal, TCompare>, public IDatNextChannel<TVal, TVal> {
+ typedef TDatSorterMemo<TVal, TCompare> TImpl;
+
+public:
+ TNextDatSorterMemo(IDatNextSource<TVal>& source, const char* dir = dbcfg::fname_temp, const char* name = "yet another sorter", size_t memory = dbcfg::small_sorter_size, size_t pagesize = dbcfg::pg_docuid, size_t pages = dbcfg::fbufsize, int pagesOrBytes = 0)
+ : TImpl(name, memory, pagesize, pages, pagesOrBytes)
+ , IDatNextChannel<TVal, TVal>(source)
+ , Sorted(false)
+ {
+ TImpl::Open(dir);
+ }
+
+ void Sort() {
+ const TVal* rec;
+ while (rec = IDatNextChannel<TVal, TVal>::Source.Next()) {
+ TImpl::Push(rec);
+ }
+ TImpl::Sort();
+ Sorted = true;
+ }
+
+ const TVal* Next() {
+ if (!Sorted)
+ Sort();
+ return TImpl::Next();
+ }
+
+private:
+ bool Sorted;
+ TString Dir;
+};
+
+template <class TInRec, class TOutRec>
+class TDatConverter: public IDatNextChannel<TInRec, TOutRec> {
+public:
+ TDatConverter(IDatNextSource<TInRec>& source)
+ : IDatNextChannel<TInRec, TOutRec>(source)
+ {
+ }
+
+ virtual void Convert(const TInRec& inrec, TOutRec& outrec) {
+ outrec(inrec);
+ }
+
+ const TOutRec* Next() {
+ const TInRec* rec = IDatNextChannel<TInRec, TOutRec>::Source.Next();
+ if (!rec)
+ return 0;
+ Convert(*rec, CurrentRec);
+ return &CurrentRec;
+ }
+
+private:
+ TOutRec CurrentRec;
+};
+
+template <class TRecA, class TRecB>
+class TMergeRec {
+public:
+ const TRecA* RecA;
+ const TRecB* RecB;
+};
+
+enum NMergeTypes {
+ MT_JOIN = 0,
+ MT_ADD = 1,
+ MT_OVERWRITE = 2,
+ MT_TYPENUM
+};
+
+template <class TRecA, class TRecB, template <typename TA, typename TB> class TCompare>
+class TNextDatMerger: public IDatNextReceiver<TRecA>, public IDatNextReceiver<TRecB>, public IDatNextSource<TMergeRec<TRecA, TRecB>> {
+public:
+ TNextDatMerger(IDatNextSource<TRecA>& sourceA, IDatNextSource<TRecB>& sourceB, ui8 mergeType)
+ : IDatNextReceiver<TRecA>(sourceA)
+ , IDatNextReceiver<TRecB>(sourceB)
+ , MergeType(mergeType)
+ , MoveA(false)
+ , MoveB(false)
+ , NotInit(true)
+ {
+ }
+
+ const TMergeRec<TRecA, TRecB>* Next() {
+ if (MoveA || NotInit)
+ SourceARec = IDatNextReceiver<TRecA>::Source.Next();
+ if (MoveB || NotInit)
+ SourceBRec = IDatNextReceiver<TRecB>::Source.Next();
+ NotInit = false;
+
+ // Cout << "Next " << SourceARec->HostId << "\t" << SourceBRec->HostId << "\t" << TCompare<TRecA, TRecB>()(SourceARec, SourceBRec) << "\t" << ::compare(SourceARec->HostId, SourceBRec->HostId) << "\t" << ::compare(1, 2) << "\t" << ::compare(2,1) << Endl;
+ if (MergeType == MT_ADD && SourceARec && (!SourceBRec || TCompare<TRecA, TRecB>()(SourceARec, SourceBRec) < 0)) {
+ MergeRec.RecA = SourceARec;
+ MergeRec.RecB = 0;
+ MoveA = true;
+ MoveB = false;
+ return &MergeRec;
+ }
+
+ if (MergeType == MT_ADD && SourceBRec && (!SourceARec || TCompare<TRecA, TRecB>()(SourceARec, SourceBRec) < 0)) {
+ MergeRec.RecA = 0;
+ MergeRec.RecB = SourceBRec;
+ MoveA = false;
+ MoveB = true;
+ return &MergeRec;
+ }
+
+ if (MergeType == MT_ADD && SourceARec && SourceBRec && TCompare<TRecA, TRecB>()(SourceARec, SourceBRec) == 0) {
+ MergeRec.RecA = SourceARec;
+ MergeRec.RecB = SourceBRec;
+ MoveA = true;
+ MoveB = true;
+ return &MergeRec;
+ }
+
+ while (MergeType == MT_JOIN && SourceARec && SourceBRec && TCompare<TRecA, TRecB>()(SourceARec, SourceBRec) != 0) {
+ while (SourceARec && TCompare<TRecA, TRecB>()(SourceARec, SourceBRec) < 0) {
+ SourceARec = IDatNextReceiver<TRecA>::Source.Next();
+ }
+ while (SourceARec && SourceBRec && TCompare<TRecA, TRecB>()(SourceARec, SourceBRec) > 0) {
+ SourceBRec = IDatNextReceiver<TRecB>::Source.Next();
+ }
+ }
+
+ if (MergeType == MT_JOIN && SourceARec && SourceBRec) {
+ MergeRec.RecA = SourceARec;
+ MergeRec.RecB = SourceBRec;
+ MoveA = true;
+ MoveB = true;
+ return &MergeRec;
+ }
+
+ MergeRec.RecA = 0;
+ MergeRec.RecB = 0;
+ return 0;
+ }
+
+ void Work() {
+ IDatNextReceiver<TRecA>::Source.Work();
+ IDatNextReceiver<TRecB>::Source.Work();
+ }
+
+private:
+ TMergeRec<TRecA, TRecB> MergeRec;
+ const TRecA* SourceARec;
+ const TRecB* SourceBRec;
+ ui8 MergeType;
+ bool MoveA;
+ bool MoveB;
+ bool NotInit;
+};
+
+/*template<class TRec, class TSource, template <typename T> class TCompare, class TReceiver = TPushOutDatFile<TRec> >
+class TPushDatMerger {
+public:
+ TPushDatMerger(TSource& source, TReceiver& receiver, ui8 mergeType)
+ : Source(source)
+ , Receiver(receiver)
+ , MergeType(mergeType)
+ {
+ }
+
+ virtual void Init() {
+ SourceRec = Source.Next();
+ }
+
+ virtual void Push(const TRec* rec) {
+ while (SourceRec && TCompare<TRec>()(SourceRec, rec, 0) < 0) {
+ if (MergeType == MT_OVERWRITE || MergeType == MT_ADD)
+ Receiver.Push(SourceRec);
+ SourceRec = Source.Next();
+ }
+
+ bool intersected = false;
+ while (SourceRec && TCompare<TRec>()(SourceRec, rec, 0) == 0) {
+ intersected = true;
+ if (MergeType == MT_ADD)
+ Receiver.Push(SourceRec);
+ SourceRec = Source.Next();
+ }
+
+ if (intersected && MergeType == MT_JOIN)
+ Receiver.Push(rec);
+
+ if (MergeType == MT_OVERWRITE || MergeType == MT_ADD)
+ Receiver.Push(rec);
+ }
+
+ virtual void Term() {
+ if (MergeType == MT_OVERWRITE || MergeType == MT_ADD) {
+ while (SourceRec) {
+ Receiver.Push(SourceRec);
+ SourceRec = Source.Next();
+ }
+ }
+ }
+
+private:
+ TSource& Source;
+ const TRec* SourceRec;
+ TReceiver& Receiver;
+ ui8 MergeType;
+};*/
+
+/*template <class TRec, class TSourceA, class TSourceB, template <typename T> class TCompare, class TReceiver = TPushOutDatFile<TRec> >
+class TNextDatMerger: public TPushDatMerger<TRec, TSourceA, TCompare, TReceiver> {
+ typedef TPushDatMerger<TRec, TSourceA, TCompare, TReceiver> TImpl;
+public:
+ TNextDatMerger(TSourceA& sourceA, TSourceB& sourceB, TReceiver& receiver, ui8 mergeType)
+ : TImpl(sourceA, receiver, mergeType)
+ , SourceB(sourceB)
+ {
+ }
+
+ virtual void Work() {
+ TImpl::Init();
+ while (SourceBRec = SourceB.Next()) {
+ TImpl::Push(SourceBRec);
+ }
+ TImpl::Term();
+ }
+private:
+ TSourceB& SourceB;
+ const TRec* SourceBRec;
+};*/
+
+/*template <class TRec, template <typename T> class TCompare, class TReceiver = TPushOutDatFile<TRec> >
+class TFilePushDatMerger: public TPushDatMerger<TRec, TFastInDatFile<TRec>, TCompare, TReceiver> {
+ typedef TPushDatMerger<TRec, TFastInDatFile<TRec>, TCompare, TReceiver> TImpl;
+public:
+ TFilePushDatMerger(const char* name, TReceiver& receiver, ui8 mergeType)
+ : TImpl(SourceFile, receiver, mergeType)
+ , SourceFile(name)
+ {
+ }
+
+ virtual void Push(const TRec* rec) {
+ TImpl::Push(rec);
+ }
+
+ virtual void Term() {
+ TImpl::Term();
+ }
+private:
+ TFastInDatFile<TRec> SourceFile;
+};*/
+
+/*template <class TRec, template <typename T> class TCompare, class TReceiver = TPushOutDatFile<TRec> >
+class TFileNextDatMerger: public TNextDatMerger<TRec, TFastInDatFile<TRec>, TFastInDatFile<TRec>, TCompare, TReceiver> {
+ typedef TNextDatMerger<TRec, TFastInDatFile<TRec>, TFastInDatFile<TRec>, TCompare, TReceiver> TImpl;
+public:
+ TFileNextDatMerger(const char* sourceAname, const char* sourceBname, TReceiver& receiver, ui8 mergeType)
+ : TImpl(FileA, FileB, receiver, mergeType)
+ , FileA(sourceAname)
+ , FileB(sourceBname)
+ {
+ }
+
+ virtual void Work() {
+ TImpl::Work();
+ }
+private:
+ TFastInDatFile<TRec> FileA;
+ TFastInDatFile<TRec> FileB;
+};*/
+
+template <class TRec, template <typename T> class TPredicate>
+class TDatNextFilter: public IDatNextChannel<TRec, TRec> {
+public:
+ TDatNextFilter(IDatNextSource<TRec>& source)
+ : IDatNextChannel<TRec, TRec>(source)
+ {
+ }
+
+ virtual const TRec* Next() {
+ const TRec* rec;
+ while ((rec = IDatNextChannel<TRec, TRec>::Source.Next()) != 0 && !Check(rec)) {
+ }
+ if (!rec)
+ return 0;
+ return rec;
+ }
+
+protected:
+ virtual bool Check(const TRec* rec) {
+ return TPredicate<TRec>()(rec);
+ }
+};
+
+template <class TRec, template <typename T> class TPredicate>
+class TDatPushFilter: public IDatPushChannel<TRec, TRec> {
+public:
+ TDatPushFilter(IDatPushReceiver<TRec>& receiver)
+ : IDatPushChannel<TRec, TRec>(receiver)
+ {
+ }
+
+ virtual void Push(const TRec* rec) {
+ if (Check(rec))
+ IDatPushChannel<TRec, TRec>::Receiver.Push(rec);
+ }
+
+private:
+ virtual bool Check(const TRec* rec) {
+ return TPredicate<TRec>()(rec);
+ }
+};
+
+template <class TInRec, class TOutRec, template <typename T> class TCompare>
+class TDatGrouper: public IDatNextChannel<TInRec, TOutRec> {
+public:
+ TDatGrouper(IDatNextSource<TInRec>& source)
+ : IDatNextChannel<TInRec, TOutRec>(source)
+ , Begin(true)
+ , Finish(false)
+ , HasOutput(false)
+ {
+ }
+
+ const TOutRec* Next() {
+ while (CurrentRec = IDatNextChannel<TInRec, TOutRec>::Source.Next()) {
+ int cmp = 0;
+ if (Begin) {
+ Begin = false;
+ OnStart();
+ } else if ((cmp = TCompare<TInRec>()(CurrentRec, LastRec, 0)) != 0) {
+ OnFinish();
+ OnStart();
+ }
+ OnRecord();
+ LastRec = CurrentRec;
+ if (HasOutput) {
+ HasOutput = false;
+ return &OutRec;
+ }
+ }
+ if (!Finish)
+ OnFinish();
+ Finish = true;
+ if (HasOutput) {
+ HasOutput = false;
+ return &OutRec;
+ }
+ return 0;
+ }
+
+protected:
+ virtual void OnStart() = 0;
+ virtual void OnRecord() = 0;
+ virtual void OnFinish() = 0;
+
+ const TInRec* CurrentRec;
+ const TInRec* LastRec;
+ TOutRec OutRec;
+
+ bool Begin;
+ bool Finish;
+ bool HasOutput;
+};
diff --git a/library/cpp/microbdb/ya.make b/library/cpp/microbdb/ya.make
new file mode 100644
index 0000000000..3e553f8535
--- /dev/null
+++ b/library/cpp/microbdb/ya.make
@@ -0,0 +1,36 @@
+LIBRARY()
+
+SRCS(
+ align.h
+ compressed.h
+ extinfo.h
+ file.cpp
+ hashes.h
+ header.h
+ header.cpp
+ heap.h
+ input.h
+ microbdb.cpp
+ noextinfo.proto
+ output.h
+ powersorter.h
+ reader.h
+ safeopen.h
+ sorter.h
+ sorterdef.h
+ utility.h
+ wrappers.h
+)
+
+PEERDIR(
+ contrib/libs/fastlz
+ contrib/libs/libc_compat
+ contrib/libs/protobuf
+ contrib/libs/snappy
+ contrib/libs/zlib
+ library/cpp/deprecated/fgood
+ library/cpp/on_disk/st_hash
+ library/cpp/packedtypes
+)
+
+END()
diff --git a/library/cpp/on_disk/CMakeLists.txt b/library/cpp/on_disk/CMakeLists.txt
index 4202947169..ade3b33c9a 100644
--- a/library/cpp/on_disk/CMakeLists.txt
+++ b/library/cpp/on_disk/CMakeLists.txt
@@ -7,3 +7,4 @@
add_subdirectory(chunks)
+add_subdirectory(st_hash)
diff --git a/library/cpp/on_disk/st_hash/CMakeLists.darwin-x86_64.txt b/library/cpp/on_disk/st_hash/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..ad332fef62
--- /dev/null
+++ b/library/cpp/on_disk/st_hash/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,18 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-on_disk-st_hash)
+target_link_libraries(cpp-on_disk-st_hash PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ cpp-deprecated-mapped_file
+)
+target_sources(cpp-on_disk-st_hash PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/on_disk/st_hash/fake.cpp
+)
diff --git a/library/cpp/on_disk/st_hash/CMakeLists.linux-aarch64.txt b/library/cpp/on_disk/st_hash/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..737875ca6c
--- /dev/null
+++ b/library/cpp/on_disk/st_hash/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,19 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-on_disk-st_hash)
+target_link_libraries(cpp-on_disk-st_hash PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ cpp-deprecated-mapped_file
+)
+target_sources(cpp-on_disk-st_hash PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/on_disk/st_hash/fake.cpp
+)
diff --git a/library/cpp/on_disk/st_hash/CMakeLists.linux-x86_64.txt b/library/cpp/on_disk/st_hash/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..737875ca6c
--- /dev/null
+++ b/library/cpp/on_disk/st_hash/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,19 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-on_disk-st_hash)
+target_link_libraries(cpp-on_disk-st_hash PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ cpp-deprecated-mapped_file
+)
+target_sources(cpp-on_disk-st_hash PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/on_disk/st_hash/fake.cpp
+)
diff --git a/library/cpp/on_disk/st_hash/CMakeLists.txt b/library/cpp/on_disk/st_hash/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/library/cpp/on_disk/st_hash/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/library/cpp/on_disk/st_hash/CMakeLists.windows-x86_64.txt b/library/cpp/on_disk/st_hash/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..ad332fef62
--- /dev/null
+++ b/library/cpp/on_disk/st_hash/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,18 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-on_disk-st_hash)
+target_link_libraries(cpp-on_disk-st_hash PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ cpp-deprecated-mapped_file
+)
+target_sources(cpp-on_disk-st_hash PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/on_disk/st_hash/fake.cpp
+)
diff --git a/library/cpp/on_disk/st_hash/fake.cpp b/library/cpp/on_disk/st_hash/fake.cpp
new file mode 100644
index 0000000000..ef5af4d432
--- /dev/null
+++ b/library/cpp/on_disk/st_hash/fake.cpp
@@ -0,0 +1,4 @@
+#include "save_stl.h"
+#include "static_hash.h"
+#include "static_hash_map.h"
+#include "sthash_iterators.h"
diff --git a/library/cpp/on_disk/st_hash/save_stl.h b/library/cpp/on_disk/st_hash/save_stl.h
new file mode 100644
index 0000000000..00f8f0e20d
--- /dev/null
+++ b/library/cpp/on_disk/st_hash/save_stl.h
@@ -0,0 +1,84 @@
+#pragma once
+
+#include <util/generic/hash.h>
+#include <util/system/yassert.h>
+#include <util/stream/output.h>
+
+// this structure might be replaced with sthashtable class
+template <class HF, class Eq, class size_type>
+struct sthashtable_nvm_sv {
+ sthashtable_nvm_sv() {
+ if (sizeof(sthashtable_nvm_sv) != sizeof(HF) + sizeof(Eq) + 3 * sizeof(size_type)) {
+ memset(this, 0, sizeof(sthashtable_nvm_sv));
+ }
+ }
+
+ sthashtable_nvm_sv(const HF& phf, const Eq& peq, const size_type& pnb, const size_type& pne, const size_type& pnd)
+ : sthashtable_nvm_sv()
+ {
+ hf = phf;
+ eq = peq;
+ num_buckets = pnb;
+ num_elements = pne;
+ data_end_off = pnd;
+ }
+
+ HF hf;
+ Eq eq;
+ size_type num_buckets;
+ size_type num_elements;
+ size_type data_end_off;
+};
+
+/**
+ * Some hack to save both THashMap and sthash.
+ * Working with stHash does not depend on the template parameters, because the content of stHash is not used inside this method.
+ */
+template <class V, class K, class HF, class Ex, class Eq, class A>
+template <class KeySaver>
+inline int THashTable<V, K, HF, Ex, Eq, A>::save_for_st(IOutputStream* stream, KeySaver& ks, sthash<int, int, THash<int>, TEqualTo<int>, typename KeySaver::TSizeType>* stHash) const {
+ Y_ASSERT(!stHash || stHash->bucket_count() == bucket_count());
+ typedef sthashtable_nvm_sv<HF, Eq, typename KeySaver::TSizeType> sv_type;
+ sv_type sv = {this->_get_hash_fun(), this->_get_key_eq(), static_cast<typename KeySaver::TSizeType>(buckets.size()), static_cast<typename KeySaver::TSizeType>(num_elements), 0};
+ // to do: m.b. use just the size of corresponding object?
+ typename KeySaver::TSizeType cur_off = sizeof(sv_type) +
+ (sv.num_buckets + 1) * sizeof(typename KeySaver::TSizeType);
+ sv.data_end_off = cur_off;
+ const_iterator n;
+ for (n = begin(); n != end(); ++n) {
+ sv.data_end_off += static_cast<typename KeySaver::TSizeType>(ks.GetRecordSize(*n));
+ }
+ typename KeySaver::TSizeType* sb = stHash ? (typename KeySaver::TSizeType*)(stHash->buckets()) : nullptr;
+ if (stHash)
+ sv.data_end_off += static_cast<typename KeySaver::TSizeType>(sb[buckets.size()] - sb[0]);
+ //saver.Align(sizeof(char*));
+ stream->Write(&sv, sizeof(sv));
+
+ size_type i;
+ //save vector
+ for (i = 0; i < buckets.size(); ++i) {
+ node* cur = buckets[i];
+ stream->Write(&cur_off, sizeof(cur_off));
+ if (cur) {
+ while (!((uintptr_t)cur & 1)) {
+ cur_off += static_cast<typename KeySaver::TSizeType>(ks.GetRecordSize(cur->val));
+ cur = cur->next;
+ }
+ }
+ if (stHash)
+ cur_off += static_cast<typename KeySaver::TSizeType>(sb[i + 1] - sb[i]);
+ }
+ stream->Write(&cur_off, sizeof(cur_off)); // end mark
+ for (i = 0; i < buckets.size(); ++i) {
+ node* cur = buckets[i];
+ if (cur) {
+ while (!((uintptr_t)cur & 1)) {
+ ks.SaveRecord(stream, cur->val);
+ cur = cur->next;
+ }
+ }
+ if (stHash)
+ stream->Write((const char*)stHash + sb[i], sb[i + 1] - sb[i]);
+ }
+ return 0;
+}
diff --git a/library/cpp/on_disk/st_hash/static_hash.h b/library/cpp/on_disk/st_hash/static_hash.h
new file mode 100644
index 0000000000..ca7a6ccd36
--- /dev/null
+++ b/library/cpp/on_disk/st_hash/static_hash.h
@@ -0,0 +1,420 @@
+#pragma once
+
+#include "save_stl.h"
+#include "sthash_iterators.h"
+
+#include <util/generic/hash.h>
+#include <util/generic/vector.h>
+#include <util/generic/buffer.h>
+#include <util/generic/cast.h>
+#include <util/generic/yexception.h> // for save/load only
+#include <util/stream/file.h>
+#include <util/stream/buffer.h>
+#include <utility>
+
+#include <memory>
+#include <algorithm>
+#include <functional>
+
+#include <cstdlib>
+#include <cstddef>
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 4624) // 'destructor could not be generated because a base class destructor is inaccessible'
+#endif
+
+template <class HashType, class KeySaver>
+inline void SaveHashToStreamEx(HashType& hash, IOutputStream* stream) {
+ KeySaver ks;
+ if (hash.save_for_st(stream, ks))
+ ythrow yexception() << "Could not save hash to stream";
+}
+
+template <class HashType>
+inline void SaveHashToStream(HashType& hash, IOutputStream* stream) {
+ typedef TSthashWriter<typename HashType::key_type, typename HashType::mapped_type, ui64> KeySaver;
+ return SaveHashToStreamEx<HashType, KeySaver>(hash, stream);
+}
+
+template <class HashType, class KeySaver>
+inline void SaveHashToFileEx(HashType& hash, const char* fileName) {
+ TFileOutput output(fileName);
+ SaveHashToStreamEx<HashType, KeySaver>(hash, &output);
+}
+
+template <class HashType>
+inline void SaveHashToFile(HashType& hash, const char* fileName) {
+ typedef TSthashWriter<typename HashType::key_type, typename HashType::mapped_type, ui64> KeySaver;
+ return SaveHashToFileEx<HashType, KeySaver>(hash, fileName);
+}
+
+template <class HashType>
+inline void SaveHashSetToFile(HashType& hash, const char* fileName) {
+ typedef TSthashSetWriter<typename HashType::key_type, ui64> KeySaver;
+ return SaveHashToFileEx<HashType, KeySaver>(hash, fileName);
+}
+
+template <class HashType>
+inline void SaveHashToFile32(HashType& hash, const char* fileName) {
+ typedef TSthashWriter<typename HashType::key_type, typename HashType::mapped_type, ui32> KeySaver;
+ return SaveHashToFileEx<HashType, KeySaver>(hash, fileName);
+}
+
+template <class HashType, class KeySaver>
+inline void SaveHashToBufferEx(HashType& hash, TBuffer& buffer, sthash<int, int, THash<int>, TEqualTo<int>, typename KeySaver::TSizeType>* stHash = nullptr) {
+ TBufferOutput stream(buffer);
+ KeySaver ks;
+ if (hash.save_for_st(&stream, ks, stHash))
+ ythrow yexception() << "Could not save hash to memory";
+}
+
+template <class HashType>
+inline void SaveHashToBuffer(HashType& hash, TBuffer& buffer) {
+ typedef TSthashWriter<typename HashType::key_type, typename HashType::mapped_type, ui64> KeySaver;
+ SaveHashToBufferEx<HashType, KeySaver>(hash, buffer);
+}
+
+/**
+ * Some hack to save both THashMap and sthash.
+ * THashMap and sthash must have same bucket_count().
+ */
+template <class HashType, class StHashType>
+inline void SaveHashToBuffer(HashType& hash, TBuffer& buffer, StHashType* stHash) {
+ typedef TSthashWriter<typename HashType::key_type, typename HashType::mapped_type, ui64> KeySaver;
+ typedef sthash<int, int, THash<int>, TEqualTo<int>, typename KeySaver::TSizeType>* SH;
+
+ SH sh = reinterpret_cast<SH>(stHash);
+ SaveHashToBufferEx<HashType, KeySaver>(hash, buffer, sh);
+}
+
+template <class HashType>
+inline void SaveHashToBuffer32(HashType& hash, TBuffer& buffer) {
+ typedef TSthashWriter<typename HashType::key_type, typename HashType::mapped_type, ui32> KeySaver;
+ SaveHashToBufferEx<HashType, KeySaver>(hash, buffer);
+}
+
+template <class Iter, typename size_type_f = ui64>
+class sthashtable {
+public:
+ typedef typename Iter::TKeyType key_type;
+ typedef typename Iter::TValueType value_type;
+ typedef typename Iter::THasherType hasher;
+ typedef typename Iter::TKeyEqualType key_equal;
+
+ typedef size_type_f size_type;
+ typedef ptrdiff_t difference_type;
+ typedef const value_type* const_pointer;
+ typedef const value_type& const_reference;
+
+ typedef Iter const_iterator;
+
+ const hasher hash_funct() const {
+ return hash;
+ }
+ const key_equal key_eq() const {
+ return equals;
+ }
+
+private:
+ const hasher hash;
+ const key_equal equals;
+
+private:
+ const_iterator iter_at_bucket(size_type bucket) const {
+ return (const_iterator)(((char*)this + buckets()[bucket]));
+ }
+
+ const_iterator iter_at_bucket_or_end(size_type bucket) const {
+ if (bucket < num_buckets)
+ return (const_iterator)(((char*)this + buckets()[bucket]));
+ else
+ return end();
+ }
+
+ const size_type num_buckets;
+ const size_type num_elements;
+ const size_type data_end_off;
+
+protected: //shut up gcc warning
+ // we can't construct/destroy this object at all!
+ sthashtable();
+ sthashtable(const sthashtable& ht);
+ ~sthashtable();
+
+public:
+ // const size_type *buckets;
+ const size_type* buckets() const {
+ return (size_type*)((char*)this + sizeof(*this));
+ }
+ const size_type buckets(size_type n) const {
+ return buckets()[n];
+ }
+
+ size_type size() const {
+ return num_elements;
+ }
+ size_type max_size() const {
+ return size_type(-1);
+ }
+ bool empty() const {
+ return size() == 0;
+ }
+
+ const_iterator begin() const {
+ return num_buckets ? iter_at_bucket(0) : end();
+ }
+
+ const_iterator end() const {
+ return (const_iterator)(((char*)this + data_end_off));
+ }
+
+public:
+ size_type size_in_bytes() const {
+ return data_end_off;
+ }
+
+ size_type bucket_count() const {
+ return num_buckets;
+ }
+
+ size_type elems_in_bucket(size_type bucket) const {
+ size_type result = 0;
+ const_iterator first = iter_at_bucket(bucket);
+ const_iterator last = iter_at_bucket_or_end(bucket + 1);
+
+ for (; first != last; ++first)
+ ++result;
+ return result;
+ }
+
+ template <class TheKey>
+ const_iterator find(const TheKey& key) const {
+ size_type n = bkt_num_key(key);
+ const_iterator first(iter_at_bucket(n)), last(iter_at_bucket_or_end(n + 1));
+ for (;
+ first != last && !first.KeyEquals(equals, key);
+ ++first) {
+ }
+ if (first != last)
+ return first;
+ return end();
+ }
+
+ size_type count(const key_type& key) const {
+ const size_type n = bkt_num_key(key);
+ size_type result = 0;
+ const_iterator first = iter_at_bucket(n);
+ const_iterator last = iter_at_bucket_or_end(n + 1);
+
+ for (; first != last; ++first)
+ if (first.KeyEquals(equals, key))
+ ++result;
+ return result;
+ }
+
+ std::pair<const_iterator, const_iterator> equal_range(const key_type& key) const;
+
+private:
+ template <class TheKey>
+ size_type bkt_num_key(const TheKey& key) const {
+ return hash(key) % num_buckets;
+ }
+};
+
+template <class I, class size_type_f>
+std::pair<I, I> sthashtable<I, size_type_f>::equal_range(const key_type& key) const {
+ typedef std::pair<const_iterator, const_iterator> pii;
+ const size_type n = bkt_num_key(key);
+ const_iterator first = iter_at_bucket(n);
+ const_iterator last = iter_at_bucket_or_end(n + 1);
+
+ for (; first != last; ++first) {
+ if (first.KeyEquals(equals, key)) {
+ const_iterator cur = first;
+ ++cur;
+ for (; cur != last; ++cur)
+ if (!cur.KeyEquals(equals, key))
+ return pii(const_iterator(first),
+ const_iterator(cur));
+ return pii(const_iterator(first),
+ const_iterator(last));
+ }
+ }
+ return pii(end(), end());
+}
+
+/* end __SGI_STL_HASHTABLE_H */
+
+template <class Key, class T, class HashFcn /*= hash<Key>*/,
+ class EqualKey = TEqualTo<Key>, typename size_type_f = ui64>
+class sthash {
+private:
+ typedef sthashtable<TSthashIterator<const Key, const T, HashFcn, EqualKey>, size_type_f> ht;
+ ht rep;
+
+public:
+ typedef typename ht::key_type key_type;
+ typedef typename ht::value_type value_type;
+ typedef typename ht::hasher hasher;
+ typedef typename ht::key_equal key_equal;
+ typedef T mapped_type;
+
+ typedef typename ht::size_type size_type;
+ typedef typename ht::difference_type difference_type;
+ typedef typename ht::const_pointer const_pointer;
+ typedef typename ht::const_reference const_reference;
+
+ typedef typename ht::const_iterator const_iterator;
+
+ const hasher hash_funct() const {
+ return rep.hash_funct();
+ }
+ const key_equal key_eq() const {
+ return rep.key_eq();
+ }
+
+public:
+ size_type size() const {
+ return rep.size();
+ }
+ size_type max_size() const {
+ return rep.max_size();
+ }
+ bool empty() const {
+ return rep.empty();
+ }
+
+ const_iterator begin() const {
+ return rep.begin();
+ }
+ const_iterator end() const {
+ return rep.end();
+ }
+
+public:
+ template <class TheKey>
+ const_iterator find(const TheKey& key) const {
+ return rep.find(key);
+ }
+ template <class TheKey>
+ bool has(const TheKey& key) const {
+ return rep.find(key) != rep.end();
+ }
+
+ size_type count(const key_type& key) const {
+ return rep.count(key);
+ }
+
+ std::pair<const_iterator, const_iterator> equal_range(const key_type& key) const {
+ return rep.equal_range(key);
+ }
+
+ size_type size_in_bytes() const {
+ return rep.size_in_bytes();
+ }
+
+ size_type bucket_count() const {
+ return rep.bucket_count();
+ }
+ size_type max_bucket_count() const {
+ return rep.max_bucket_count();
+ }
+ size_type elems_in_bucket(size_type n) const {
+ return rep.elems_in_bucket(n);
+ }
+
+ const size_type* buckets() const {
+ return rep.buckets();
+ }
+ const size_type buckets(size_type n) const {
+ return rep.buckets()[n];
+ }
+};
+
+template <class Key, class HashFcn,
+ class EqualKey = TEqualTo<Key>, typename size_type_f = ui64>
+class sthash_set: public sthash<Key, TEmptyValue, HashFcn, EqualKey, size_type_f> {
+ typedef sthash<Key, TEmptyValue, HashFcn, EqualKey, size_type_f> Base;
+
+public:
+ using Base::const_iterator;
+ using Base::hasher;
+ using Base::key_equal;
+ using Base::key_type;
+ using Base::size_type;
+ using Base::value_type;
+};
+
+template <class Key, class T, class HashFcn /*= hash<Key>*/,
+ class EqualKey = TEqualTo<Key>, typename size_type_f = ui64>
+class sthash_mm {
+private:
+ typedef sthashtable<TSthashIterator<const Key, T, HashFcn, EqualKey>, size_type_f> ht;
+ ht rep;
+
+public:
+ typedef typename ht::key_type key_type;
+ typedef typename ht::value_type value_type;
+ typedef typename ht::hasher hasher;
+ typedef typename ht::key_equal key_equal;
+ typedef T mapped_type;
+
+ typedef typename ht::size_type size_type;
+ typedef typename ht::difference_type difference_type;
+ typedef typename ht::const_pointer const_pointer;
+ typedef typename ht::const_reference const_reference;
+
+ typedef typename ht::const_iterator const_iterator;
+
+ const hasher hash_funct() const {
+ return rep.hash_funct();
+ }
+ const key_equal key_eq() const {
+ return rep.key_eq();
+ }
+
+public:
+ size_type size() const {
+ return rep.size();
+ }
+ size_type max_size() const {
+ return rep.max_size();
+ }
+ bool empty() const {
+ return rep.empty();
+ }
+
+ const_iterator begin() const {
+ return rep.begin();
+ }
+ const_iterator end() const {
+ return rep.end();
+ }
+
+ const_iterator find(const key_type& key) const {
+ return rep.find(key);
+ }
+
+ size_type count(const key_type& key) const {
+ return rep.count(key);
+ }
+
+ std::pair<const_iterator, const_iterator> equal_range(const key_type& key) const {
+ return rep.equal_range(key);
+ }
+
+ size_type bucket_count() const {
+ return rep.bucket_count();
+ }
+ size_type max_bucket_count() const {
+ return rep.max_bucket_count();
+ }
+ size_type elems_in_bucket(size_type n) const {
+ return rep.elems_in_bucket(n);
+ }
+};
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
diff --git a/library/cpp/on_disk/st_hash/static_hash_map.h b/library/cpp/on_disk/st_hash/static_hash_map.h
new file mode 100644
index 0000000000..5dc50abd39
--- /dev/null
+++ b/library/cpp/on_disk/st_hash/static_hash_map.h
@@ -0,0 +1,59 @@
+#pragma once
+
+#include "static_hash.h"
+
+#include <library/cpp/deprecated/mapped_file/mapped_file.h>
+
+#include <util/system/filemap.h>
+
+template <class SH>
+struct sthash_mapped_c {
+ typedef SH H;
+ typedef typename H::const_iterator const_iterator;
+ TMappedFile M;
+ H* hsh;
+ sthash_mapped_c()
+ : M()
+ , hsh(nullptr)
+ {
+ }
+ sthash_mapped_c(const char* fname, bool precharge)
+ : M()
+ , hsh(nullptr)
+ {
+ Open(fname, precharge);
+ }
+ void Open(const char* fname, bool precharge) {
+ M.init(fname);
+ if (precharge)
+ M.precharge();
+ hsh = (H*)M.getData();
+ if (M.getSize() < sizeof(H) || (ssize_t)M.getSize() != hsh->end().Data - (char*)hsh)
+ ythrow yexception() << "Could not map hash: " << fname << " is damaged";
+ }
+ H* operator->() {
+ return hsh;
+ }
+ const H* operator->() const {
+ return hsh;
+ }
+ H* GetSthash() {
+ return hsh;
+ }
+ const H* GetSthash() const {
+ return hsh;
+ }
+};
+
+template <class Key, class T, class Hash>
+struct sthash_mapped: public sthash_mapped_c<sthash<Key, T, Hash>> {
+ typedef sthash<Key, T, Hash> H;
+ sthash_mapped(const char* fname, bool precharge)
+ : sthash_mapped_c<H>(fname, precharge)
+ {
+ }
+ sthash_mapped()
+ : sthash_mapped_c<H>()
+ {
+ }
+};
diff --git a/library/cpp/on_disk/st_hash/sthash_iterators.h b/library/cpp/on_disk/st_hash/sthash_iterators.h
new file mode 100644
index 0000000000..6a9ebdd6c3
--- /dev/null
+++ b/library/cpp/on_disk/st_hash/sthash_iterators.h
@@ -0,0 +1,334 @@
+#pragma once
+
+#include "save_stl.h"
+
+#include <util/system/align.h>
+
+/**
+ This file provides functionality for saving some relatively simple THashMap object
+ to disk in a form that can be mapped read-only (via mmap) at any address.
+ That saved object is accessed via pointer to sthash object (that must have
+ the same parameters as original THashMap object)
+
+ If either key or value are variable-sized (i.e. contain pointers), user must
+ write his own instantiation of TSthashIterator (read iterator for sthash) and
+ TSthashWriter (write iterator for THashMap).
+ An example for <const char *, B> pair is in here.
+**/
+
+// TEmptyValue and SizeOfEx are helpers for sthash_set
+struct TEmptyValue {
+ TEmptyValue() = default;
+};
+
+template <class T>
+inline size_t SizeOfEx() {
+ return sizeof(T);
+}
+
+template <>
+inline size_t SizeOfEx<TEmptyValue>() {
+ return 0;
+}
+template <>
+inline size_t SizeOfEx<const TEmptyValue>() {
+ return 0;
+}
+
+template <class TKey, class TValue, class HashFcn, class EqualKey>
+struct TSthashIterator {
+ // Implementation for simple types
+ typedef const TKey TKeyType;
+ typedef const TValue TValueType;
+ typedef EqualKey TKeyEqualType;
+ typedef HashFcn THasherType;
+
+ const char* Data;
+ TSthashIterator()
+ : Data(nullptr)
+ {
+ }
+ explicit TSthashIterator(const char* data)
+ : Data(data)
+ {
+ }
+ void operator++() {
+ Data += GetLength();
+ }
+
+ bool operator!=(const TSthashIterator& that) const {
+ return Data != that.Data;
+ }
+ bool operator==(const TSthashIterator& that) const {
+ return Data == that.Data;
+ }
+ TKey& Key() const {
+ return *(TKey*)Data;
+ }
+ TValue& Value() {
+ return *(TValue*)(Data + sizeof(TKey));
+ }
+ const TValue& Value() const {
+ return *(const TValue*)(Data + sizeof(TKey));
+ }
+
+ template <class AnotherKeyType>
+ bool KeyEquals(const EqualKey& eq, const AnotherKeyType& key) const {
+ return eq(*(TKey*)Data, key);
+ }
+
+ size_t GetLength() const {
+ return sizeof(TKey) + SizeOfEx<TValue>();
+ }
+};
+
+template <class Key, class Value, typename size_type_o = ui64>
+struct TSthashWriter {
+ typedef size_type_o TSizeType;
+ size_t GetRecordSize(const std::pair<const Key, const Value>&) const {
+ return sizeof(Key) + SizeOfEx<Value>();
+ }
+ int SaveRecord(IOutputStream* stream, const std::pair<const Key, const Value>& record) const {
+ stream->Write(&record.first, sizeof(Key));
+ stream->Write(&record.second, SizeOfEx<Value>());
+ return 0;
+ }
+};
+
+// Remember that this simplified implementation makes a copy of `key' in std::make_pair.
+// It can also waste some memory on undesired alignment.
+template <class Key, typename size_type_o = ui64>
+struct TSthashSetWriter: public TSthashWriter<Key, TEmptyValue, size_type_o> {
+ typedef TSthashWriter<Key, TEmptyValue, size_type_o> MapWriter;
+ size_t GetRecordSize(const Key& key) const {
+ return MapWriter::GetRecordSize(std::make_pair(key, TEmptyValue()));
+ }
+ int SaveRecord(IOutputStream* stream, const Key& key) const {
+ return MapWriter::SaveRecord(stream, std::make_pair(key, TEmptyValue()));
+ }
+};
+
+// we can't save something with pointers without additional tricks
+
+template <class A, class B, class HashFcn, class EqualKey>
+struct TSthashIterator<A*, B, HashFcn, EqualKey> {};
+
+template <class A, class B, class HashFcn, class EqualKey>
+struct TSthashIterator<A, B*, HashFcn, EqualKey> {};
+
+template <class A, class B, typename size_type_o>
+struct TSthashWriter<A*, B*, size_type_o> {};
+
+template <class A, class B, typename size_type_o>
+struct TSthashWriter<A*, B, size_type_o> {};
+
+template <class A, class B, typename size_type_o>
+struct TSthashWriter<A, B*, size_type_o> {};
+
+template <class T>
+inline size_t AlignForChrKey() {
+ return 4; // TODO: change this (requeres rebuilt of a few existing files)
+}
+
+template <>
+inline size_t AlignForChrKey<TEmptyValue>() {
+ return 1;
+}
+
+template <>
+inline size_t AlignForChrKey<const TEmptyValue>() {
+ return AlignForChrKey<TEmptyValue>();
+}
+
+// !! note that for char*, physical placement of key and value is swapped
+template <class TValue, class HashFcn, class EqualKey>
+struct TSthashIterator<const char* const, TValue, HashFcn, EqualKey> {
+ typedef const TValue TValueType;
+ typedef const char* TKeyType;
+ typedef EqualKey TKeyEqualType;
+ typedef HashFcn THasherType;
+
+ const char* Data;
+ TSthashIterator()
+ : Data(nullptr)
+ {
+ }
+ TSthashIterator(const char* data)
+ : Data(data)
+ {
+ }
+ void operator++() {
+ Data += GetLength();
+ }
+
+ bool operator!=(const TSthashIterator& that) const {
+ return Data != that.Data;
+ }
+ bool operator==(const TSthashIterator& that) const {
+ return Data == that.Data;
+ }
+ const char* Key() const {
+ return Data + SizeOfEx<TValue>();
+ }
+ TValue& Value() {
+ return *(TValue*)Data;
+ }
+ const TValue& Value() const {
+ return *(const TValue*)Data;
+ }
+
+ template <class K>
+ bool KeyEquals(const EqualKey& eq, const K& k) const {
+ return eq(Data + SizeOfEx<TValue>(), k);
+ }
+
+ size_t GetLength() const {
+ size_t length = strlen(Data + SizeOfEx<TValue>()) + 1 + SizeOfEx<TValue>();
+ length = AlignUp(length, AlignForChrKey<TValue>());
+ return length;
+ }
+};
+
+template <class Value, typename size_type_o>
+struct TSthashWriter<const char*, Value, size_type_o> {
+ typedef size_type_o TSizeType;
+ size_t GetRecordSize(const std::pair<const char*, const Value>& record) const {
+ size_t length = strlen(record.first) + 1 + SizeOfEx<Value>();
+ length = AlignUp(length, AlignForChrKey<Value>());
+ return length;
+ }
+ int SaveRecord(IOutputStream* stream, const std::pair<const char*, const Value>& record) const {
+ const char* alignBuffer = "qqqq";
+ stream->Write(&record.second, SizeOfEx<Value>());
+ size_t length = strlen(record.first) + 1;
+ stream->Write(record.first, length);
+ length = AlignUpSpace(length, AlignForChrKey<Value>());
+ if (length)
+ stream->Write(alignBuffer, length);
+ return 0;
+ }
+};
+
+template <class TKey, class HashFcn, class EqualKey>
+struct TSthashIterator<TKey, const char* const, HashFcn, EqualKey> {
+ typedef const TKey TKeyType;
+ typedef const char* TValueType;
+ typedef EqualKey TKeyEqualType;
+ typedef HashFcn THasherType;
+
+ const char* Data;
+ TSthashIterator()
+ : Data(nullptr)
+ {
+ }
+ TSthashIterator(const char* data)
+ : Data(data)
+ {
+ }
+ void operator++() {
+ Data += GetLength();
+ }
+
+ bool operator!=(const TSthashIterator& that) const {
+ return Data != that.Data;
+ }
+ bool operator==(const TSthashIterator& that) const {
+ return Data == that.Data;
+ }
+ TKey& Key() {
+ return *(TKey*)Data;
+ }
+ const char* Value() const {
+ return Data + sizeof(TKey);
+ }
+
+ template <class K>
+ bool KeyEquals(const EqualKey& eq, const K& k) const {
+ return eq(*(TKey*)Data, k);
+ }
+
+ size_t GetLength() const {
+ size_t length = strlen(Data + sizeof(TKey)) + 1 + sizeof(TKey);
+ length = AlignUp(length, (size_t)4);
+ return length;
+ }
+};
+
+template <class Key, typename size_type_o>
+struct TSthashWriter<Key, const char*, size_type_o> {
+ typedef size_type_o TSizeType;
+ size_t GetRecordSize(const std::pair<const Key, const char*>& record) const {
+ size_t length = strlen(record.second) + 1 + sizeof(Key);
+ length = AlignUp(length, (size_t)4);
+ return length;
+ }
+ int SaveRecord(IOutputStream* stream, const std::pair<const Key, const char*>& record) const {
+ const char* alignBuffer = "qqqq";
+ stream->Write(&record.first, sizeof(Key));
+ size_t length = strlen(record.second) + 1;
+ stream->Write(record.second, length);
+ length = AlignUpSpace(length, (size_t)4);
+ if (length)
+ stream->Write(alignBuffer, length);
+ return 0;
+ }
+};
+
+template <class HashFcn, class EqualKey>
+struct TSthashIterator<const char* const, const char* const, HashFcn, EqualKey> {
+ typedef const char* TKeyType;
+ typedef const char* TValueType;
+ typedef EqualKey TKeyEqualType;
+ typedef HashFcn THasherType;
+
+ const char* Data;
+ TSthashIterator()
+ : Data(nullptr)
+ {
+ }
+ TSthashIterator(const char* data)
+ : Data(data)
+ {
+ }
+ void operator++() {
+ Data += GetLength();
+ }
+
+ bool operator!=(const TSthashIterator& that) const {
+ return Data != that.Data;
+ }
+ bool operator==(const TSthashIterator& that) const {
+ return Data == that.Data;
+ }
+ const char* Key() const {
+ return Data;
+ }
+ const char* Value() const {
+ return Data + strlen(Data) + 1;
+ }
+
+ template <class K>
+ bool KeyEquals(const EqualKey& eq, const K& k) const {
+ return eq(Data, k);
+ }
+
+ size_t GetLength() const {
+ size_t length = strlen(Data) + 1;
+ length += strlen(Data + length) + 1;
+ return length;
+ }
+};
+
+template <typename size_type_o>
+struct TSthashWriter<const char*, const char*, size_type_o> {
+ typedef size_type_o TSizeType;
+ size_t GetRecordSize(const std::pair<const char*, const char*>& record) const {
+ size_t size = strlen(record.first) + strlen(record.second) + 2;
+ return size;
+ }
+ int SaveRecord(IOutputStream* stream, const std::pair<const char*, const char*>& record) const {
+ stream->Write(record.first, strlen(record.first) + 1);
+ stream->Write(record.second, strlen(record.second) + 1);
+ return 0;
+ }
+};
diff --git a/library/cpp/on_disk/st_hash/ya.make b/library/cpp/on_disk/st_hash/ya.make
new file mode 100644
index 0000000000..8c6d05711c
--- /dev/null
+++ b/library/cpp/on_disk/st_hash/ya.make
@@ -0,0 +1,15 @@
+LIBRARY()
+
+SRCS(
+ fake.cpp
+ save_stl.h
+ static_hash.h
+ static_hash_map.h
+ sthash_iterators.h
+)
+
+PEERDIR(
+ library/cpp/deprecated/mapped_file
+)
+
+END()
diff --git a/library/cpp/regex/CMakeLists.darwin-x86_64.txt b/library/cpp/regex/CMakeLists.darwin-x86_64.txt
index 6e2a4fabcd..877d40538b 100644
--- a/library/cpp/regex/CMakeLists.darwin-x86_64.txt
+++ b/library/cpp/regex/CMakeLists.darwin-x86_64.txt
@@ -6,6 +6,7 @@
# original buildsystem will not be accepted.
+add_subdirectory(glob)
add_subdirectory(hyperscan)
add_subdirectory(pcre)
add_subdirectory(pire)
diff --git a/library/cpp/regex/CMakeLists.linux-aarch64.txt b/library/cpp/regex/CMakeLists.linux-aarch64.txt
index 279390306b..84c257a819 100644
--- a/library/cpp/regex/CMakeLists.linux-aarch64.txt
+++ b/library/cpp/regex/CMakeLists.linux-aarch64.txt
@@ -6,5 +6,6 @@
# original buildsystem will not be accepted.
+add_subdirectory(glob)
add_subdirectory(pcre)
add_subdirectory(pire)
diff --git a/library/cpp/regex/CMakeLists.linux-x86_64.txt b/library/cpp/regex/CMakeLists.linux-x86_64.txt
index 6e2a4fabcd..877d40538b 100644
--- a/library/cpp/regex/CMakeLists.linux-x86_64.txt
+++ b/library/cpp/regex/CMakeLists.linux-x86_64.txt
@@ -6,6 +6,7 @@
# original buildsystem will not be accepted.
+add_subdirectory(glob)
add_subdirectory(hyperscan)
add_subdirectory(pcre)
add_subdirectory(pire)
diff --git a/library/cpp/regex/CMakeLists.windows-x86_64.txt b/library/cpp/regex/CMakeLists.windows-x86_64.txt
index 6e2a4fabcd..877d40538b 100644
--- a/library/cpp/regex/CMakeLists.windows-x86_64.txt
+++ b/library/cpp/regex/CMakeLists.windows-x86_64.txt
@@ -6,6 +6,7 @@
# original buildsystem will not be accepted.
+add_subdirectory(glob)
add_subdirectory(hyperscan)
add_subdirectory(pcre)
add_subdirectory(pire)
diff --git a/library/cpp/regex/glob/CMakeLists.darwin-x86_64.txt b/library/cpp/regex/glob/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..ca8383e355
--- /dev/null
+++ b/library/cpp/regex/glob/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,19 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-regex-glob)
+target_link_libraries(cpp-regex-glob PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-charset
+)
+target_sources(cpp-regex-glob PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/regex/glob/glob.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/regex/glob/glob_iterator.cpp
+)
diff --git a/library/cpp/regex/glob/CMakeLists.linux-aarch64.txt b/library/cpp/regex/glob/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..3953937c6d
--- /dev/null
+++ b/library/cpp/regex/glob/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,20 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-regex-glob)
+target_link_libraries(cpp-regex-glob PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-charset
+)
+target_sources(cpp-regex-glob PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/regex/glob/glob.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/regex/glob/glob_iterator.cpp
+)
diff --git a/library/cpp/regex/glob/CMakeLists.linux-x86_64.txt b/library/cpp/regex/glob/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..3953937c6d
--- /dev/null
+++ b/library/cpp/regex/glob/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,20 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-regex-glob)
+target_link_libraries(cpp-regex-glob PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-charset
+)
+target_sources(cpp-regex-glob PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/regex/glob/glob.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/regex/glob/glob_iterator.cpp
+)
diff --git a/library/cpp/regex/glob/CMakeLists.txt b/library/cpp/regex/glob/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/library/cpp/regex/glob/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/library/cpp/regex/glob/CMakeLists.windows-x86_64.txt b/library/cpp/regex/glob/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..ca8383e355
--- /dev/null
+++ b/library/cpp/regex/glob/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,19 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-regex-glob)
+target_link_libraries(cpp-regex-glob PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-charset
+)
+target_sources(cpp-regex-glob PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/regex/glob/glob.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/regex/glob/glob_iterator.cpp
+)
diff --git a/library/cpp/regex/glob/glob.cpp b/library/cpp/regex/glob/glob.cpp
new file mode 100644
index 0000000000..9da058122a
--- /dev/null
+++ b/library/cpp/regex/glob/glob.cpp
@@ -0,0 +1,921 @@
+#define FROM_IMPLEMENTATION
+#include "glob_compat.h"
+
+#if defined(USE_INTERNAL_GLOB)
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Guido van Rossum.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <library/cpp/charset/ci_string.h>
+#include <util/system/compat.h>
+#include <util/folder/dirut.h>
+
+/*
+ * glob(3) -- a superset of the one defined in POSIX 1003.2.
+ *
+ * The [!...] convention to negate a range is supported (SysV, Posix, ksh).
+ *
+ * Optional extra services, controlled by flags not defined by POSIX:
+ *
+ * GLOB_QUOTE:
+ * Escaping convention: \ inhibits any special meaning the following
+ * character might have (except \ at end of string is retained).
+ * GLOB_MAGCHAR:
+ * Set in gl_flags if pattern contained a globbing character.
+ * GLOB_NOMAGIC:
+ * Same as GLOB_NOCHECK, but it will only append pattern if it did
+ * not contain any magic characters. [Used in csh style globbing]
+ * GLOB_ALTDIRFUNC:
+ * Use alternately specified directory access functions.
+ * GLOB_TILDE:
+ * expand ~user/foo to the /home/dir/of/user/foo
+ * GLOB_BRACE:
+ * expand {1,2}{a,b} to 1a 1b 2a 2b
+ * gl_matchc:
+ * Number of matches in the current invocation of glob.
+ */
+
+/*
+ * Some notes on multibyte character support:
+ * 1. Patterns with illegal byte sequences match nothing - even if
+ * GLOB_NOCHECK is specified.
+ * 2. Illegal byte sequences in filenames are handled by treating them as
+ * single-byte characters with a value of the first byte of the sequence
+ * cast to wchar_t.
+ * 3. State-dependent encodings are not currently supported.
+ */
+
+//#include <sys/param.h>
+#include <sys/stat.h>
+
+#include <ctype.h>
+//#include <dirent.h>
+#include <errno.h>
+#include <limits.h>
+//#include <pwd.h>
+//#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#if defined(_unix_)
+#include <unistd.h>
+#endif
+#include <wchar.h>
+
+#if !defined(_unix_)
+// silly replacement for compilation
+using uint_fast64_t = ui64;
+using u_int = unsigned int;
+using u_char = unsigned char;
+#define ARG_MAX 256
+#define S_ISDIR(x) ((x) & _S_IFDIR)
+#define S_ISLNK(x) 0
+#define lstat stat
+inline bool issetugid() { return false; }
+inline char *getlogin() { return 0; }
+inline int getuid() { return 0; }
+struct passwd {
+ char *pw_dir;
+};
+inline passwd *getpwuid(int) { return 0; }
+inline passwd *getpwnam(char *) { return 0; }
+#endif
+
+#define __collate_load_error 1
+inline int __collate_range_cmp(int, int) { return 0; }
+#undef COMMA // was defined in stroka.h
+// end silly replacement
+
+//#include "collate.h"
+
+#define DOLLAR '$'
+#define DOT '.'
+#define EOS '\0'
+#define LBRACKET '['
+#define NOT '!'
+#define QUESTION '?'
+#define QUOTE '\\'
+#define RANGE '-'
+#define RBRACKET ']'
+#define SEP '/'
+#define STAR '*'
+#define TILDE '~'
+#define UNDERSCORE '_'
+#define LBRACE '{'
+#define RBRACE '}'
+#define SLASH '/'
+#define COMMA ','
+
+#ifndef DEBUG
+
+#define M_QUOTE 0x8000000000ULL
+#define M_PROTECT 0x4000000000ULL
+#define M_MASK 0xffffffffffULL
+#define M_CHAR 0x00ffffffffULL
+
+using Char = uint_fast64_t;
+
+#else
+
+#define M_QUOTE 0x80
+#define M_PROTECT 0x40
+#define M_MASK 0xff
+#define M_CHAR 0x7f
+
+using Char = char;
+
+#endif
+
+
+#define CHAR(c) ((Char)((c)&M_CHAR))
+#define META(c) ((Char)((c)|M_QUOTE))
+#define M_ALL META('*')
+#define M_END META(']')
+#define M_NOT META('!')
+#define M_ONE META('?')
+#define M_RNG META('-')
+#define M_SET META('[')
+#define ismeta(c) (((c)&M_QUOTE) != 0)
+
+
+static int compare(const void *, const void *);
+static int g_Ctoc(const Char *, char *, u_int);
+static int g_lstat(Char *, struct stat *, glob_t *);
+static DIR *g_opendir(Char *, glob_t *);
+static Char *g_strchr(Char *, wchar_t);
+#ifdef notdef
+static Char *g_strcat(Char *, const Char *);
+#endif
+static int glob0(const Char *, glob_t *, int *);
+static int glob1(Char *, glob_t *, int *);
+static int glob2(Char *, Char *, Char *, Char *, glob_t *, int *);
+static int glob3(Char *, Char *, Char *, Char *, Char *, glob_t *, int *);
+static int globextend(const Char *, glob_t *, int *);
+static const Char *
+ globtilde(const Char *, Char *, size_t, glob_t *);
+static int globexp1(const Char *, glob_t *, int *);
+static int globexp2(const Char *, const Char *, glob_t *, int *, int *);
+static int match(Char *, Char *, Char *);
+#ifdef DEBUG
+static void qprintf(const char *, Char *);
+#endif
+
+int
+glob(const char *pattern, int flags, int (*errfunc)(const char *, int), glob_t *pglob)
+{
+ const u_char *patnext;
+ int limit;
+ Char *bufnext, *bufend, patbuf[MAXPATHLEN], prot;
+ mbstate_t mbs;
+ wchar_t wc;
+ size_t clen;
+
+ patnext = (u_char *) pattern;
+ if (!(flags & GLOB_APPEND)) {
+ pglob->gl_pathc = 0;
+ pglob->gl_pathv = NULL;
+ if (!(flags & GLOB_DOOFFS))
+ pglob->gl_offs = 0;
+ }
+ if (flags & GLOB_LIMIT) {
+ limit = pglob->gl_matchc;
+ if (limit == 0)
+ limit = ARG_MAX;
+ } else
+ limit = 0;
+ pglob->gl_flags = flags & ~GLOB_MAGCHAR;
+ pglob->gl_errfunc = errfunc;
+ pglob->gl_matchc = 0;
+
+ bufnext = patbuf;
+ bufend = bufnext + MAXPATHLEN - 1;
+ if (flags & GLOB_NOESCAPE) {
+ memset(&mbs, 0, sizeof(mbs));
+ while (bufend - bufnext >= MB_CUR_MAX) {
+ clen = mbrtowc(&wc, (const char*)patnext, MB_LEN_MAX, &mbs);
+ if (clen == (size_t)-1 || clen == (size_t)-2)
+ return (GLOB_NOMATCH);
+ else if (clen == 0)
+ break;
+ *bufnext++ = wc;
+ patnext += clen;
+ }
+ } else {
+ /* Protect the quoted characters. */
+ memset(&mbs, 0, sizeof(mbs));
+ while (bufend - bufnext >= MB_CUR_MAX) {
+ if (*patnext == QUOTE) {
+ if (*++patnext == EOS) {
+ *bufnext++ = QUOTE | M_PROTECT;
+ continue;
+ }
+ prot = M_PROTECT;
+ } else
+ prot = 0;
+ clen = mbrtowc(&wc, (const char*)patnext, MB_LEN_MAX, &mbs);
+ if (clen == (size_t)-1 || clen == (size_t)-2)
+ return (GLOB_NOMATCH);
+ else if (clen == 0)
+ break;
+ *bufnext++ = wc | prot;
+ patnext += clen;
+ }
+ }
+ *bufnext = EOS;
+
+ if (flags & GLOB_BRACE)
+ return globexp1(patbuf, pglob, &limit);
+ else
+ return glob0(patbuf, pglob, &limit);
+}
+
+/*
+ * Expand recursively a glob {} pattern. When there is no more expansion
+ * invoke the standard globbing routine to glob the rest of the magic
+ * characters
+ */
+static int
+globexp1(const Char *pattern, glob_t *pglob, int *limit)
+{
+ const Char* ptr = pattern;
+ int rv;
+
+ /* Protect a single {}, for find(1), like csh */
+ if (pattern[0] == LBRACE && pattern[1] == RBRACE && pattern[2] == EOS)
+ return glob0(pattern, pglob, limit);
+
+ while ((ptr = (const Char *) g_strchr((Char *) ptr, LBRACE)) != NULL)
+ if (!globexp2(ptr, pattern, pglob, &rv, limit))
+ return rv;
+
+ return glob0(pattern, pglob, limit);
+}
+
+
+/*
+ * Recursive brace globbing helper. Tries to expand a single brace.
+ * If it succeeds then it invokes globexp1 with the new pattern.
+ * If it fails then it tries to glob the rest of the pattern and returns.
+ */
+static int
+globexp2(const Char *ptr, const Char *pattern, glob_t *pglob, int *rv, int *limit)
+{
+ int i;
+ Char *lm, *ls;
+ const Char *pe, *pm, *pm1, *pl;
+ Char patbuf[MAXPATHLEN];
+
+ /* copy part up to the brace */
+ for (lm = patbuf, pm = pattern; pm != ptr; *lm++ = *pm++)
+ continue;
+ *lm = EOS;
+ ls = lm;
+
+ /* Find the balanced brace */
+ for (i = 0, pe = ++ptr; *pe; pe++)
+ if (*pe == LBRACKET) {
+ /* Ignore everything between [] */
+ for (pm = pe++; *pe != RBRACKET && *pe != EOS; pe++)
+ continue;
+ if (*pe == EOS) {
+ /*
+ * We could not find a matching RBRACKET.
+ * Ignore and just look for RBRACE
+ */
+ pe = pm;
+ }
+ }
+ else if (*pe == LBRACE)
+ i++;
+ else if (*pe == RBRACE) {
+ if (i == 0)
+ break;
+ i--;
+ }
+
+ /* Non matching braces; just glob the pattern */
+ if (i != 0 || *pe == EOS) {
+ *rv = glob0(patbuf, pglob, limit);
+ return 0;
+ }
+
+ for (i = 0, pl = pm = ptr; pm <= pe; pm++)
+ switch (*pm) {
+ case LBRACKET:
+ /* Ignore everything between [] */
+ for (pm1 = pm++; *pm != RBRACKET && *pm != EOS; pm++)
+ continue;
+ if (*pm == EOS) {
+ /*
+ * We could not find a matching RBRACKET.
+ * Ignore and just look for RBRACE
+ */
+ pm = pm1;
+ }
+ break;
+
+ case LBRACE:
+ i++;
+ break;
+
+ case RBRACE:
+ if (i) {
+ i--;
+ break;
+ }
+ [[fallthrough]];
+ case COMMA:
+ if (i && *pm == COMMA)
+ break;
+ else {
+ /* Append the current string */
+ for (lm = ls; (pl < pm); *lm++ = *pl++)
+ continue;
+ /*
+ * Append the rest of the pattern after the
+ * closing brace
+ */
+ for (pl = pe + 1; (*lm++ = *pl++) != EOS;)
+ continue;
+
+ /* Expand the current pattern */
+#ifdef DEBUG
+ qprintf("globexp2:", patbuf);
+#endif
+ *rv = globexp1(patbuf, pglob, limit);
+
+ /* move after the comma, to the next string */
+ pl = pm + 1;
+ }
+ break;
+
+ default:
+ break;
+ }
+ *rv = 0;
+ return 0;
+}
+
+
+
+/*
+ * expand tilde from the passwd file.
+ */
+static const Char *
+globtilde(const Char *pattern, Char *patbuf, size_t patbuf_len, glob_t *pglob)
+{
+ struct passwd *pwd;
+ char *h;
+ const Char *p;
+ Char *b, *eb;
+
+ if (*pattern != TILDE || !(pglob->gl_flags & GLOB_TILDE))
+ return pattern;
+
+ /*
+ * Copy up to the end of the string or /
+ */
+ eb = &patbuf[patbuf_len - 1];
+ for (p = pattern + 1, h = (char *) patbuf;
+ h < (char *)eb && *p && *p != SLASH; *h++ = (char)*p++)
+ continue;
+
+ *h = EOS;
+
+ if (((char *) patbuf)[0] == EOS) {
+ /*
+ * handle a plain ~ or ~/ by expanding $HOME first (iff
+ * we're not running setuid or setgid) and then trying
+ * the password file
+ */
+ if (issetugid() != 0 ||
+ (h = ::getenv("HOME")) == NULL) {
+ if (((h = getlogin()) != NULL &&
+ (pwd = getpwnam(h)) != NULL) ||
+ (pwd = getpwuid(getuid())) != NULL)
+ h = pwd->pw_dir;
+ else
+ return pattern;
+ }
+ }
+ else {
+ /*
+ * Expand a ~user
+ */
+ if ((pwd = getpwnam((char*) patbuf)) == NULL)
+ return pattern;
+ else
+ h = pwd->pw_dir;
+ }
+
+ /* Copy the home directory */
+ for (b = patbuf; b < eb && *h; *b++ = *h++)
+ continue;
+
+ /* Append the rest of the pattern */
+ while (b < eb && (*b++ = *p++) != EOS)
+ continue;
+ *b = EOS;
+
+ return patbuf;
+}
+
+
+/*
+ * The main glob() routine: compiles the pattern (optionally processing
+ * quotes), calls glob1() to do the real pattern matching, and finally
+ * sorts the list (unless unsorted operation is requested). Returns 0
+ * if things went well, nonzero if errors occurred.
+ */
+static int
+glob0(const Char *pattern, glob_t *pglob, int *limit)
+{
+ const Char *qpatnext;
+ int c, err, oldpathc;
+ Char *bufnext, patbuf[MAXPATHLEN];
+
+ qpatnext = globtilde(pattern, patbuf, MAXPATHLEN, pglob);
+ oldpathc = pglob->gl_pathc;
+ bufnext = patbuf;
+
+ /* We don't need to check for buffer overflow any more. */
+ while ((c = (char)*qpatnext++) != EOS) {
+ switch (c) {
+ case LBRACKET:
+ c = (char)*qpatnext;
+ if (c == NOT)
+ ++qpatnext;
+ if (*qpatnext == EOS ||
+ g_strchr((Char *) qpatnext+1, RBRACKET) == NULL) {
+ *bufnext++ = LBRACKET;
+ if (c == NOT)
+ --qpatnext;
+ break;
+ }
+ *bufnext++ = M_SET;
+ if (c == NOT)
+ *bufnext++ = M_NOT;
+ c = (char)*qpatnext++;
+ do {
+ *bufnext++ = CHAR(c);
+ if (*qpatnext == RANGE &&
+ (c = (char)qpatnext[1]) != RBRACKET) {
+ *bufnext++ = M_RNG;
+ *bufnext++ = CHAR(c);
+ qpatnext += 2;
+ }
+ } while ((c = (char)*qpatnext++) != RBRACKET);
+ pglob->gl_flags |= GLOB_MAGCHAR;
+ *bufnext++ = M_END;
+ break;
+ case QUESTION:
+ pglob->gl_flags |= GLOB_MAGCHAR;
+ *bufnext++ = M_ONE;
+ break;
+ case STAR:
+ pglob->gl_flags |= GLOB_MAGCHAR;
+ /* collapse adjacent stars to one,
+ * to avoid exponential behavior
+ */
+ if (bufnext == patbuf || bufnext[-1] != M_ALL)
+ *bufnext++ = M_ALL;
+ break;
+ default:
+ *bufnext++ = CHAR(c);
+ break;
+ }
+ }
+ *bufnext = EOS;
+#ifdef DEBUG
+ qprintf("glob0:", patbuf);
+#endif
+
+ if ((err = glob1(patbuf, pglob, limit)) != 0)
+ return(err);
+
+ /*
+ * If there was no match we are going to append the pattern
+ * if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified
+ * and the pattern did not contain any magic characters
+ * GLOB_NOMAGIC is there just for compatibility with csh.
+ */
+ if (pglob->gl_pathc == oldpathc) {
+ if (((pglob->gl_flags & GLOB_NOCHECK) ||
+ ((pglob->gl_flags & GLOB_NOMAGIC) &&
+ !(pglob->gl_flags & GLOB_MAGCHAR))))
+ return(globextend(pattern, pglob, limit));
+ else
+ return(GLOB_NOMATCH);
+ }
+ if (!(pglob->gl_flags & GLOB_NOSORT))
+ qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc,
+ pglob->gl_pathc - oldpathc, sizeof(char *), compare);
+ return(0);
+}
+
+static int
+compare(const void *p, const void *q)
+{
+ return(strcmp(*(char **)p, *(char **)q));
+}
+
+static int
+glob1(Char *pattern, glob_t *pglob, int *limit)
+{
+ Char pathbuf[MAXPATHLEN];
+
+ /* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */
+ if (*pattern == EOS)
+ return(0);
+ return(glob2(pathbuf, pathbuf, pathbuf + MAXPATHLEN - 1,
+ pattern, pglob, limit));
+}
+
+/*
+ * The functions glob2 and glob3 are mutually recursive; there is one level
+ * of recursion for each segment in the pattern that contains one or more
+ * meta characters.
+ */
+static int
+glob2(Char *pathbuf, Char *pathend, Char *pathend_last, Char *pattern, glob_t *pglob, int *limit)
+{
+ struct stat sb;
+ Char *p, *q;
+ int anymeta;
+
+ /*
+ * Loop over pattern segments until end of pattern or until
+ * segment with meta character found.
+ */
+ for (anymeta = 0;;) {
+ if (*pattern == EOS) { /* End of pattern? */
+ *pathend = EOS;
+ if (g_lstat(pathbuf, &sb, pglob))
+ return(0);
+
+ if (((pglob->gl_flags & GLOB_MARK) &&
+ pathend[-1] != SEP) && (S_ISDIR(sb.st_mode))) {
+ if (pathend + 1 > pathend_last)
+ return (GLOB_ABORTED);
+ *pathend++ = SEP;
+ *pathend = EOS;
+ }
+ ++pglob->gl_matchc;
+ return(globextend(pathbuf, pglob, limit));
+ }
+
+ /* Find end of next segment, copy tentatively to pathend. */
+ q = pathend;
+ p = pattern;
+ while (*p != EOS && *p != SEP) {
+ if (ismeta(*p))
+ anymeta = 1;
+ if (q + 1 > pathend_last)
+ return (GLOB_ABORTED);
+ *q++ = *p++;
+ }
+
+ if (!anymeta) { /* No expansion, do next segment. */
+ pathend = q;
+ pattern = p;
+ while (*pattern == SEP) {
+ if (pathend + 1 > pathend_last)
+ return (GLOB_ABORTED);
+ *pathend++ = *pattern++;
+ }
+ } else /* Need expansion, recurse. */
+ return(glob3(pathbuf, pathend, pathend_last, pattern, p,
+ pglob, limit));
+ }
+ /* NOTREACHED */
+}
+
+static int
+glob3(Char *pathbuf, Char *pathend, Char *pathend_last, Char *pattern, Char *restpattern, glob_t *pglob, int *limit)
+{
+ struct dirent *dp;
+ DIR *dirp;
+ int err;
+ char buf[MAXPATHLEN];
+
+ /*
+ * The readdirfunc declaration can't be prototyped, because it is
+ * assigned, below, to two functions which are prototyped in glob.h
+ * and dirent.h as taking pointers to differently typed opaque
+ * structures.
+ */
+ typedef struct dirent *(*readdirfunc_t)(void*);
+ readdirfunc_t readdirfunc;
+
+ if (pathend > pathend_last)
+ return (GLOB_ABORTED);
+ *pathend = EOS;
+ errno = 0;
+
+ if ((dirp = g_opendir(pathbuf, pglob)) == NULL) {
+ /* TODO: don't call for ENOENT or ENOTDIR? */
+ if (pglob->gl_errfunc) {
+ if (g_Ctoc(pathbuf, buf, sizeof(buf)))
+ return (GLOB_ABORTED);
+ if (pglob->gl_errfunc(buf, errno) ||
+ pglob->gl_flags & GLOB_ERR)
+ return (GLOB_ABORTED);
+ }
+ return(0);
+ }
+
+ err = 0;
+
+ /* Search directory for matching names. */
+ if (pglob->gl_flags & GLOB_ALTDIRFUNC)
+ readdirfunc = pglob->gl_readdir;
+ else
+ readdirfunc = (readdirfunc_t)readdir;
+ while ((dp = (*readdirfunc)(dirp))) {
+ u_char *sc;
+ Char *dc;
+ wchar_t wc;
+ size_t clen;
+ mbstate_t mbs;
+
+ /* Initial DOT must be matched literally. */
+ if (dp->d_name[0] == DOT && *pattern != DOT)
+ continue;
+ memset(&mbs, 0, sizeof(mbs));
+ dc = pathend;
+ sc = (u_char *) dp->d_name;
+ while (dc < pathend_last) {
+ clen = mbrtowc(&wc, (const char*)sc, MB_LEN_MAX, &mbs);
+ if (clen == (size_t)-1 || clen == (size_t)-2) {
+ wc = *sc;
+ clen = 1;
+ memset(&mbs, 0, sizeof(mbs));
+ }
+ if ((*dc++ = wc) == EOS)
+ break;
+ sc += clen;
+ }
+ if (!match(pathend, pattern, restpattern)) {
+ *pathend = EOS;
+ continue;
+ }
+ err = glob2(pathbuf, --dc, pathend_last, restpattern,
+ pglob, limit);
+ if (err)
+ break;
+ }
+
+ if (pglob->gl_flags & GLOB_ALTDIRFUNC)
+ (*pglob->gl_closedir)(dirp);
+ else
+ closedir(dirp);
+ return(err);
+}
+
+
+/*
+ * Extend the gl_pathv member of a glob_t structure to accomodate a new item,
+ * add the new item, and update gl_pathc.
+ *
+ * This assumes the BSD realloc, which only copies the block when its size
+ * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic
+ * behavior.
+ *
+ * Return 0 if new item added, error code if memory couldn't be allocated.
+ *
+ * Invariant of the glob_t structure:
+ * Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
+ * gl_pathv points to (gl_offs + gl_pathc + 1) items.
+ */
+static int
+globextend(const Char *path, glob_t *pglob, int *limit)
+{
+ char **pathv;
+ int i;
+ size_t newsize, len;
+ char *copy;
+ const Char *p;
+
+ if (*limit && pglob->gl_pathc > *limit) {
+ errno = 0;
+ return (GLOB_NOSPACE);
+ }
+
+ newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs);
+ pathv = pglob->gl_pathv ?
+ (char**)realloc((char *)pglob->gl_pathv, newsize) :
+ (char**)malloc(newsize);
+ if (pathv == NULL) {
+ if (pglob->gl_pathv) {
+ free(pglob->gl_pathv);
+ pglob->gl_pathv = NULL;
+ }
+ return(GLOB_NOSPACE);
+ }
+
+ if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
+ /* first time around -- clear initial gl_offs items */
+ pathv += pglob->gl_offs;
+ for (i = pglob->gl_offs; --i >= 0; )
+ *--pathv = NULL;
+ }
+ pglob->gl_pathv = pathv;
+
+ for (p = path; *p++;)
+ continue;
+ len = MB_CUR_MAX * (size_t)(p - path); /* XXX overallocation */
+ if ((copy = (char*)malloc(len)) != NULL) {
+ if (g_Ctoc(path, copy, (u_int)len)) {
+ free(copy);
+ return (GLOB_NOSPACE);
+ }
+ pathv[pglob->gl_offs + pglob->gl_pathc++] = copy;
+ }
+ pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
+ return(copy == NULL ? GLOB_NOSPACE : 0);
+}
+
+/*
+ * pattern matching function for filenames. Each occurrence of the *
+ * pattern causes a recursion level.
+ */
+static int
+match(Char *name, Char *pat, Char *patend)
+{
+ int ok, negate_range;
+ Char c, k;
+
+ while (pat < patend) {
+ c = *pat++;
+ switch (c & M_MASK) {
+ case M_ALL:
+ if (pat == patend)
+ return(1);
+ do
+ if (match(name, pat, patend))
+ return(1);
+ while (*name++ != EOS);
+ return(0);
+ case M_ONE:
+ if (*name++ == EOS)
+ return(0);
+ break;
+ case M_SET:
+ ok = 0;
+ if ((k = *name++) == EOS)
+ return(0);
+ if ((negate_range = ((*pat & M_MASK) == M_NOT)) != EOS)
+ ++pat;
+ while (((c = *pat++) & M_MASK) != M_END)
+ if ((*pat & M_MASK) == M_RNG) {
+ if (__collate_load_error ?
+ CHAR(c) <= CHAR(k) && CHAR(k) <= CHAR(pat[1]) :
+ __collate_range_cmp((int)CHAR(c), (int)CHAR(k)) <= 0
+ && __collate_range_cmp((int)CHAR(k), (int)CHAR(pat[1])) <= 0
+ )
+ ok = 1;
+ pat += 2;
+ } else if (c == k)
+ ok = 1;
+ if (ok == negate_range)
+ return(0);
+ break;
+ default:
+ if (*name++ != c)
+ return(0);
+ break;
+ }
+ }
+ return(*name == EOS);
+}
+
+/* Free allocated data belonging to a glob_t structure. */
+void
+globfree(glob_t *pglob)
+{
+ int i;
+ char **pp;
+
+ if (pglob->gl_pathv != NULL) {
+ pp = pglob->gl_pathv + pglob->gl_offs;
+ for (i = pglob->gl_pathc; i--; ++pp)
+ if (*pp)
+ free(*pp);
+ free(pglob->gl_pathv);
+ pglob->gl_pathv = NULL;
+ }
+}
+
+static DIR *
+g_opendir(Char *str, glob_t *pglob)
+{
+ char buf[MAXPATHLEN];
+
+ if (!*str)
+ strcpy(buf, ".");
+ else {
+ if (g_Ctoc(str, buf, sizeof(buf)))
+ return (NULL);
+ }
+
+ if (pglob->gl_flags & GLOB_ALTDIRFUNC)
+ return (DIR*)((*pglob->gl_opendir)(buf));
+
+ return(opendir(buf));
+}
+
+static int
+g_lstat(Char *fn, struct stat *sb, glob_t *pglob)
+{
+ char buf[MAXPATHLEN];
+
+ if (g_Ctoc(fn, buf, sizeof(buf))) {
+ errno = ENAMETOOLONG;
+ return (-1);
+ }
+ if (pglob->gl_flags & GLOB_ALTDIRFUNC)
+ return((*pglob->gl_lstat)(buf, sb));
+ return(lstat(buf, sb));
+}
+
+static Char *
+g_strchr(Char *str, wchar_t ch)
+{
+ do {
+ if (*str == ch)
+ return (str);
+ } while (*str++);
+ return (NULL);
+}
+
+static int
+g_Ctoc(const Char *str, char *buf, u_int len)
+{
+ mbstate_t mbs;
+ size_t clen;
+
+ memset(&mbs, 0, sizeof(mbs));
+ while ((int)len >= MB_CUR_MAX) {
+ clen = wcrtomb(buf, (wchar_t)*str, &mbs);
+ if (clen == (size_t)-1)
+ return (1);
+ if (*str == L'\0')
+ return (0);
+ str++;
+ buf += clen;
+ len -= (u_int)clen;
+ }
+ return (1);
+}
+
+#ifdef DEBUG
+static void
+qprintf(const char *str, Char *s)
+{
+ Char *p;
+
+ (void)printf("%s:\n", str);
+ for (p = s; *p; p++)
+ (void)printf("%c", CHAR(*p));
+ (void)printf("\n");
+ for (p = s; *p; p++)
+ (void)printf("%c", *p & M_PROTECT ? '"' : ' ');
+ (void)printf("\n");
+ for (p = s; *p; p++)
+ (void)printf("%c", ismeta(*p) ? '_' : ' ');
+ (void)printf("\n");
+}
+#endif
+#endif
diff --git a/library/cpp/regex/glob/glob_compat.h b/library/cpp/regex/glob/glob_compat.h
new file mode 100644
index 0000000000..0dc518d51b
--- /dev/null
+++ b/library/cpp/regex/glob/glob_compat.h
@@ -0,0 +1,73 @@
+#pragma once
+
+#include <util/system/defaults.h>
+
+#if defined(_MSC_VER) || defined(_bionic_)
+#define USE_INTERNAL_GLOB
+#endif
+
+#if !defined(USE_INTERNAL_GLOB)
+#include <glob.h>
+#else
+
+struct stat;
+typedef struct {
+ int gl_pathc; /* Count of total paths so far. */
+ int gl_matchc; /* Count of paths matching pattern. */
+ int gl_offs; /* Reserved at beginning of gl_pathv. */
+ int gl_flags; /* Copy of flags parameter to glob. */
+ char** gl_pathv; /* List of paths matching pattern. */
+ /* Copy of errfunc parameter to glob. */
+ int (*gl_errfunc)(const char*, int);
+
+ /*
+ * Alternate filesystem access methods for glob; replacement
+ * versions of closedir(3), readdir(3), opendir(3), stat(2)
+ * and lstat(2).
+ */
+ void (*gl_closedir)(void*);
+ struct dirent* (*gl_readdir)(void*);
+ void* (*gl_opendir)(const char*);
+ int (*gl_lstat)(const char*, struct stat*);
+ int (*gl_stat)(const char*, struct stat*);
+} glob_t;
+
+//#if __POSIX_VISIBLE >= 199209
+/* Believed to have been introduced in 1003.2-1992 */
+#define GLOB_APPEND 0x0001 /* Append to output from previous call. */
+#define GLOB_DOOFFS 0x0002 /* Use gl_offs. */
+#define GLOB_ERR 0x0004 /* Return on error. */
+#define GLOB_MARK 0x0008 /* Append / to matching directories. */
+#define GLOB_NOCHECK 0x0010 /* Return pattern itself if nothing matches. */
+#define GLOB_NOSORT 0x0020 /* Don't sort. */
+#define GLOB_NOESCAPE 0x2000 /* Disable backslash escaping. */
+
+/* Error values returned by glob(3) */
+#define GLOB_NOSPACE (-1) /* Malloc call failed. */
+#define GLOB_ABORTED (-2) /* Unignored error. */
+#define GLOB_NOMATCH (-3) /* No match and GLOB_NOCHECK was not set. */
+#define GLOB_NOSYS (-4) /* Obsolete: source comptability only. */
+//#endif /* __POSIX_VISIBLE >= 199209 */
+
+//#if __BSD_VISIBLE
+#define GLOB_ALTDIRFUNC 0x0040 /* Use alternately specified directory funcs. */
+#define GLOB_BRACE 0x0080 /* Expand braces ala csh. */
+#define GLOB_MAGCHAR 0x0100 /* Pattern had globbing characters. */
+#define GLOB_NOMAGIC 0x0200 /* GLOB_NOCHECK without magic chars (csh). */
+#define GLOB_QUOTE 0x0400 /* Quote special chars with \. */
+#define GLOB_TILDE 0x0800 /* Expand tilde names from the passwd file. */
+#define GLOB_LIMIT 0x1000 /* limit number of returned paths */
+
+/* source compatibility, these are the old names */
+#define GLOB_MAXPATH GLOB_LIMIT
+#define GLOB_ABEND GLOB_ABORTED
+//#endif /* __BSD_VISIBLE */
+
+int glob(const char*, int, int (*)(const char*, int), glob_t*);
+void globfree(glob_t*);
+
+#endif /* _MSC_VER */
+
+#if !defined(FROM_IMPLEMENTATION)
+#undef USE_INTERNAL_GLOB
+#endif
diff --git a/library/cpp/regex/glob/glob_iterator.cpp b/library/cpp/regex/glob/glob_iterator.cpp
new file mode 100644
index 0000000000..746b49f397
--- /dev/null
+++ b/library/cpp/regex/glob/glob_iterator.cpp
@@ -0,0 +1 @@
+#include "glob_iterator.h"
diff --git a/library/cpp/regex/glob/glob_iterator.h b/library/cpp/regex/glob/glob_iterator.h
new file mode 100644
index 0000000000..e25481e594
--- /dev/null
+++ b/library/cpp/regex/glob/glob_iterator.h
@@ -0,0 +1,36 @@
+#pragma once
+
+#include "glob_compat.h"
+
+#include <util/generic/noncopyable.h>
+#include <util/generic/string.h>
+#include <util/generic/yexception.h>
+
+class TGlobPaths : TNonCopyable {
+public:
+ TGlobPaths(const char* pattern) {
+ Impl.gl_pathc = 0;
+ int result = glob(pattern, 0, nullptr, &Impl);
+ Y_ENSURE(result == 0 || result == GLOB_NOMATCH, "glob failed");
+ }
+
+ TGlobPaths(const TString& pattern)
+ : TGlobPaths(pattern.data())
+ {
+ }
+
+ ~TGlobPaths() {
+ globfree(&Impl);
+ }
+
+ const char** begin() {
+ return const_cast<const char**>(Impl.gl_pathv);
+ }
+
+ const char** end() {
+ return const_cast<const char**>(Impl.gl_pathv + Impl.gl_pathc);
+ }
+
+private:
+ glob_t Impl;
+};
diff --git a/library/cpp/regex/glob/ya.make b/library/cpp/regex/glob/ya.make
new file mode 100644
index 0000000000..9379742d99
--- /dev/null
+++ b/library/cpp/regex/glob/ya.make
@@ -0,0 +1,12 @@
+LIBRARY()
+
+SRCS(
+ glob.cpp
+ glob_iterator.cpp
+)
+
+PEERDIR(
+ library/cpp/charset
+)
+
+END()
diff --git a/library/cpp/reverse_geocoder/CMakeLists.txt b/library/cpp/reverse_geocoder/CMakeLists.txt
new file mode 100644
index 0000000000..621e95fdb2
--- /dev/null
+++ b/library/cpp/reverse_geocoder/CMakeLists.txt
@@ -0,0 +1,11 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(core)
+add_subdirectory(library)
+add_subdirectory(proto)
diff --git a/library/cpp/reverse_geocoder/core/CMakeLists.darwin-x86_64.txt b/library/cpp/reverse_geocoder/core/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..17f6e79c96
--- /dev/null
+++ b/library/cpp/reverse_geocoder/core/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,35 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-reverse_geocoder-core)
+target_link_libraries(cpp-reverse_geocoder-core PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ cpp-reverse_geocoder-library
+ cpp-reverse_geocoder-proto
+ cpp-digest-crc32c
+)
+target_sources(cpp-reverse_geocoder-core PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/area_box.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/bbox.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/common.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/edge.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/reverse_geocoder.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/kv.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/location.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/part.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/point.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/polygon.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/region.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/debug.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/def.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/geo_data.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/map.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/proxy.cpp
+)
diff --git a/library/cpp/reverse_geocoder/core/CMakeLists.linux-aarch64.txt b/library/cpp/reverse_geocoder/core/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..02361a0a1a
--- /dev/null
+++ b/library/cpp/reverse_geocoder/core/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,36 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-reverse_geocoder-core)
+target_link_libraries(cpp-reverse_geocoder-core PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ cpp-reverse_geocoder-library
+ cpp-reverse_geocoder-proto
+ cpp-digest-crc32c
+)
+target_sources(cpp-reverse_geocoder-core PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/area_box.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/bbox.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/common.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/edge.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/reverse_geocoder.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/kv.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/location.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/part.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/point.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/polygon.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/region.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/debug.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/def.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/geo_data.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/map.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/proxy.cpp
+)
diff --git a/library/cpp/reverse_geocoder/core/CMakeLists.linux-x86_64.txt b/library/cpp/reverse_geocoder/core/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..02361a0a1a
--- /dev/null
+++ b/library/cpp/reverse_geocoder/core/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,36 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-reverse_geocoder-core)
+target_link_libraries(cpp-reverse_geocoder-core PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ cpp-reverse_geocoder-library
+ cpp-reverse_geocoder-proto
+ cpp-digest-crc32c
+)
+target_sources(cpp-reverse_geocoder-core PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/area_box.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/bbox.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/common.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/edge.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/reverse_geocoder.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/kv.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/location.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/part.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/point.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/polygon.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/region.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/debug.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/def.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/geo_data.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/map.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/proxy.cpp
+)
diff --git a/library/cpp/reverse_geocoder/core/CMakeLists.txt b/library/cpp/reverse_geocoder/core/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/library/cpp/reverse_geocoder/core/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/library/cpp/reverse_geocoder/core/CMakeLists.windows-x86_64.txt b/library/cpp/reverse_geocoder/core/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..17f6e79c96
--- /dev/null
+++ b/library/cpp/reverse_geocoder/core/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,35 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-reverse_geocoder-core)
+target_link_libraries(cpp-reverse_geocoder-core PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ cpp-reverse_geocoder-library
+ cpp-reverse_geocoder-proto
+ cpp-digest-crc32c
+)
+target_sources(cpp-reverse_geocoder-core PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/area_box.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/bbox.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/common.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/edge.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/reverse_geocoder.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/kv.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/location.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/part.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/point.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/polygon.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/region.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/debug.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/def.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/geo_data.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/map.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/proxy.cpp
+)
diff --git a/library/cpp/reverse_geocoder/core/area_box.cpp b/library/cpp/reverse_geocoder/core/area_box.cpp
new file mode 100644
index 0000000000..67038fe4f8
--- /dev/null
+++ b/library/cpp/reverse_geocoder/core/area_box.cpp
@@ -0,0 +1,9 @@
+#include "area_box.h"
+
+using namespace NReverseGeocoder;
+
+TRef NReverseGeocoder::LookupAreaBox(const TPoint& point) {
+ const TRef boxX = (point.X - NAreaBox::LowerX) / NAreaBox::DeltaX;
+ const TRef boxY = (point.Y - NAreaBox::LowerY) / NAreaBox::DeltaY;
+ return boxX * NAreaBox::NumberY + boxY;
+}
diff --git a/library/cpp/reverse_geocoder/core/area_box.h b/library/cpp/reverse_geocoder/core/area_box.h
new file mode 100644
index 0000000000..1077a65fef
--- /dev/null
+++ b/library/cpp/reverse_geocoder/core/area_box.h
@@ -0,0 +1,34 @@
+#pragma once
+
+#include "common.h"
+#include "point.h"
+
+namespace NReverseGeocoder {
+ namespace NAreaBox {
+ const TCoordinate LowerX = ToCoordinate(-180.0);
+ const TCoordinate UpperX = ToCoordinate(180.0);
+ const TCoordinate LowerY = ToCoordinate(-90.0);
+ const TCoordinate UpperY = ToCoordinate(90.0);
+ const TCoordinate DeltaX = ToCoordinate(0.1);
+ const TCoordinate DeltaY = ToCoordinate(0.1);
+ const TCoordinate NumberX = (UpperX - LowerX) / DeltaX;
+ const TCoordinate NumberY = (UpperY - LowerY) / DeltaY;
+ const TCoordinate Number = NumberX * NumberY;
+
+ }
+
+ // Area of geo territory. Variable PolygonRefsOffset refers to the polygons lying inside this
+ // area. Geo map is divided into equal bounding boxes from (NAreaBox::LowerX, NAreaBox::LowerY)
+ // to (NAreaBox::UpperX, NAreaBox::UpperY) with DeltaX and DeltaY sizes. Logic of filling is in
+ // generator.
+ struct Y_PACKED TAreaBox {
+ TNumber PolygonRefsOffset;
+ TNumber PolygonRefsNumber;
+ };
+
+ static_assert(sizeof(TAreaBox) == 8, "NReverseGeocoder::TAreaBox size mismatch");
+
+ // Determine in wich area box in geoData is point.
+ TRef LookupAreaBox(const TPoint& point);
+
+}
diff --git a/library/cpp/reverse_geocoder/core/bbox.cpp b/library/cpp/reverse_geocoder/core/bbox.cpp
new file mode 100644
index 0000000000..aa4258ac22
--- /dev/null
+++ b/library/cpp/reverse_geocoder/core/bbox.cpp
@@ -0,0 +1 @@
+#include "bbox.h"
diff --git a/library/cpp/reverse_geocoder/core/bbox.h b/library/cpp/reverse_geocoder/core/bbox.h
new file mode 100644
index 0000000000..e8b6e00aa3
--- /dev/null
+++ b/library/cpp/reverse_geocoder/core/bbox.h
@@ -0,0 +1,66 @@
+#pragma once
+
+#include "common.h"
+#include "point.h"
+
+#include <util/generic/utility.h>
+
+namespace NReverseGeocoder {
+ struct Y_PACKED TBoundingBox {
+ TCoordinate X1;
+ TCoordinate Y1;
+ TCoordinate X2;
+ TCoordinate Y2;
+
+ TBoundingBox()
+ : X1(0)
+ , Y1(0)
+ , X2(0)
+ , Y2(0)
+ {
+ }
+
+ TBoundingBox(TCoordinate x1, TCoordinate y1, TCoordinate x2, TCoordinate y2)
+ : X1(x1)
+ , Y1(y1)
+ , X2(x2)
+ , Y2(y2)
+ {
+ }
+
+ TBoundingBox(const TPoint* points, TNumber number) {
+ Init();
+ for (TNumber i = 0; i < number; ++i)
+ Relax(points[i]);
+ }
+
+ void Init() {
+ X1 = ToCoordinate(180.0);
+ Y1 = ToCoordinate(90.0);
+ X2 = ToCoordinate(-180.0);
+ Y2 = ToCoordinate(-90.0);
+ }
+
+ void Relax(const TPoint& p) {
+ X1 = Min(X1, p.X);
+ Y1 = Min(Y1, p.Y);
+ X2 = Max(X2, p.X);
+ Y2 = Max(Y2, p.Y);
+ }
+
+ bool HasIntersection(const TBoundingBox& r) const {
+ if (X1 > r.X2 || X2 < r.X1 || Y1 > r.Y2 || Y2 < r.Y1)
+ return false;
+ return true;
+ }
+
+ bool Contains(const TPoint& p) const {
+ if (p.X < X1 || p.X > X2 || p.Y < Y1 || p.Y > Y2)
+ return false;
+ return true;
+ }
+ };
+
+ static_assert(sizeof(TBoundingBox) == 16, "NReverseGeocoder::TBoundingBox size mismatch");
+
+}
diff --git a/library/cpp/reverse_geocoder/core/common.cpp b/library/cpp/reverse_geocoder/core/common.cpp
new file mode 100644
index 0000000000..67c02a20a0
--- /dev/null
+++ b/library/cpp/reverse_geocoder/core/common.cpp
@@ -0,0 +1 @@
+#include "common.h"
diff --git a/library/cpp/reverse_geocoder/core/common.h b/library/cpp/reverse_geocoder/core/common.h
new file mode 100644
index 0000000000..090407ffd9
--- /dev/null
+++ b/library/cpp/reverse_geocoder/core/common.h
@@ -0,0 +1,24 @@
+#pragma once
+
+#include <util/system/compiler.h>
+#include <util/system/types.h>
+
+namespace NReverseGeocoder {
+ using TCoordinate = i32;
+ using TGeoId = ui64;
+ using TNumber = ui32;
+ using TRef = ui32;
+ using TSquare = i64;
+ using TVersion = ui64;
+
+ const double EARTH_RADIUS = 6371000.0;
+
+ inline TCoordinate ToCoordinate(double x) {
+ return x * 1e6;
+ }
+
+ inline double ToDouble(TCoordinate x) {
+ return x / 1e6;
+ }
+
+}
diff --git a/library/cpp/reverse_geocoder/core/edge.cpp b/library/cpp/reverse_geocoder/core/edge.cpp
new file mode 100644
index 0000000000..86c6ab8535
--- /dev/null
+++ b/library/cpp/reverse_geocoder/core/edge.cpp
@@ -0,0 +1 @@
+#include "edge.h"
diff --git a/library/cpp/reverse_geocoder/core/edge.h b/library/cpp/reverse_geocoder/core/edge.h
new file mode 100644
index 0000000000..9d20928857
--- /dev/null
+++ b/library/cpp/reverse_geocoder/core/edge.h
@@ -0,0 +1,101 @@
+#pragma once
+
+#include "common.h"
+#include "point.h"
+
+#include <util/generic/utility.h>
+#include <util/system/yassert.h>
+
+namespace NReverseGeocoder {
+ // TEdge is a type, which represent polygon edge, Beg/End refers on begin/End edge points in
+ // geographical data.
+ struct Y_PACKED TEdge {
+ TRef Beg;
+ TRef End;
+
+ TEdge()
+ : Beg(0)
+ , End(0)
+ {
+ }
+
+ TEdge(const TRef& a, const TRef& b)
+ : Beg(a)
+ , End(b)
+ {
+ }
+
+ bool operator==(const TEdge& e) const {
+ return Beg == e.Beg && End == e.End;
+ }
+
+ bool operator!=(const TEdge& e) const {
+ return Beg != e.Beg || End != e.End;
+ }
+
+ bool operator<(const TEdge& e) const {
+ return Beg < e.Beg || (Beg == e.Beg && End < e.End);
+ }
+
+ // Checks that current edge is lying lower then other edge. Both edges must have a common X
+ // values, otherwise the behavior is undefined.
+ bool Lower(const TEdge& e, const TPoint* points) const {
+ if (*this == e)
+ return false;
+
+ const TPoint& a1 = points[Beg];
+ const TPoint& a2 = points[End];
+ const TPoint& b1 = points[e.Beg];
+ const TPoint& b2 = points[e.End];
+
+ Y_ASSERT(a1.X <= a2.X && b1.X <= b2.X);
+
+ if (a1 == b1) {
+ return (a2 - a1).Cross(b2 - a1) > 0;
+ } else if (a2 == b2) {
+ return (a1 - b1).Cross(b2 - b1) > 0;
+ } else if (b1.X >= a1.X && b1.X <= a2.X) {
+ return (a2 - a1).Cross(b1 - a1) > 0;
+ } else if (b2.X >= a1.X && b2.X <= a2.X) {
+ return (a2 - a1).Cross(b2 - a1) > 0;
+ } else if (a1.X >= b1.X && a1.X <= b2.X) {
+ return (a1 - b1).Cross(b2 - b1) > 0;
+ } else if (a2.X >= b1.X && a2.X <= b2.X) {
+ return (a2 - b1).Cross(b2 - b1) > 0;
+ } else {
+ return false;
+ }
+ }
+
+ // Checks that current edge lying lower then given point. Edge and point must have a common X
+ // values, otherwise the behavior is undefined.
+ bool Lower(const TPoint& p, const TPoint* points) const {
+ if (Contains(p, points))
+ return false;
+
+ TPoint a = points[Beg];
+ TPoint b = points[End];
+
+ if (a.X > b.X)
+ DoSwap(a, b);
+
+ return (b - a).Cross(p - a) > 0;
+ }
+
+ bool Contains(const TPoint& p, const TPoint* points) const {
+ TPoint a = points[Beg];
+ TPoint b = points[End];
+
+ if (a.X > b.X)
+ DoSwap(a, b);
+
+ if (p.X < a.X || p.X > b.X)
+ return false;
+
+ return (b - a).Cross(p - a) == 0;
+ }
+ };
+
+ static_assert(sizeof(TEdge) == 8, "NReverseGeocoder::TEdge size mismatch");
+
+}
diff --git a/library/cpp/reverse_geocoder/core/geo_data/debug.cpp b/library/cpp/reverse_geocoder/core/geo_data/debug.cpp
new file mode 100644
index 0000000000..4db0534b22
--- /dev/null
+++ b/library/cpp/reverse_geocoder/core/geo_data/debug.cpp
@@ -0,0 +1,74 @@
+#include "debug.h"
+
+#include <library/cpp/reverse_geocoder/library/log.h>
+#include <library/cpp/reverse_geocoder/library/memory.h>
+
+using namespace NReverseGeocoder;
+using namespace NGeoData;
+
+size_t NReverseGeocoder::NGeoData::Space(const IGeoData& g) {
+ size_t space = 0;
+
+#define GEO_BASE_DEF_VAR(TVar, Var) \
+ space += sizeof(TVar);
+
+#define GEO_BASE_DEF_ARR(TArr, Arr) \
+ space += sizeof(TNumber) + sizeof(TArr) * g.Arr##Number();
+
+ GEO_BASE_DEF_GEO_DATA
+
+#undef GEO_BASE_DEF_VAR
+#undef GEO_BASE_DEF_ARR
+
+ return space;
+}
+
+template <typename TArr>
+static float ArraySpace(TNumber number) {
+ return number * sizeof(TArr) * 1.0 / MB;
+}
+
+void NReverseGeocoder::NGeoData::Show(IOutputStream& out, const IGeoData& g) {
+ out << "GeoData = " << NGeoData::Space(g) * 1.0 / GB << " GB" << '\n';
+
+#define GEO_BASE_DEF_VAR(TVar, Var) \
+ out << " GeoData." << #Var << " = " << (unsigned long long)g.Var() << '\n';
+
+#define GEO_BASE_DEF_ARR(TArr, Arr) \
+ out << " GeoData." << #Arr << " = " \
+ << g.Arr##Number() << " x " << sizeof(TArr) << " = " \
+ << ArraySpace<TArr>(g.Arr##Number()) << " MB" \
+ << '\n';
+
+ GEO_BASE_DEF_GEO_DATA
+
+#undef GEO_BASE_DEF_VAR
+#undef GEO_BASE_DEF_ARR
+}
+
+template <typename TArr>
+static bool Equals(const TArr* a, const TArr* b, size_t count) {
+ return !memcmp(a, b, sizeof(TArr) * count);
+}
+
+bool NReverseGeocoder::NGeoData::Equals(const IGeoData& a, const IGeoData& b) {
+#define GEO_BASE_DEF_VAR(TVar, Var) \
+ if (a.Var() != b.Var()) { \
+ LogError(#Var " not equal"); \
+ return false; \
+ }
+
+#define GEO_BASE_DEF_ARR(TArr, Arr) \
+ GEO_BASE_DEF_VAR(TNumber, Arr##Number); \
+ if (!::Equals(a.Arr(), b.Arr(), a.Arr##Number())) { \
+ LogError(#Arr " not equal"); \
+ return false; \
+ }
+
+ GEO_BASE_DEF_GEO_DATA
+
+#undef GEO_BASE_DEF_VAR
+#undef GEO_BASE_DEF_ARR
+
+ return true;
+}
diff --git a/library/cpp/reverse_geocoder/core/geo_data/debug.h b/library/cpp/reverse_geocoder/core/geo_data/debug.h
new file mode 100644
index 0000000000..e7a4d9029c
--- /dev/null
+++ b/library/cpp/reverse_geocoder/core/geo_data/debug.h
@@ -0,0 +1,16 @@
+#pragma once
+
+#include "geo_data.h"
+
+#include <util/stream/output.h>
+
+namespace NReverseGeocoder {
+ namespace NGeoData {
+ size_t Space(const IGeoData& g);
+
+ void Show(IOutputStream& out, const IGeoData& g);
+
+ bool Equals(const IGeoData& a, const IGeoData& b);
+
+ }
+}
diff --git a/library/cpp/reverse_geocoder/core/geo_data/def.cpp b/library/cpp/reverse_geocoder/core/geo_data/def.cpp
new file mode 100644
index 0000000000..bb9f760d73
--- /dev/null
+++ b/library/cpp/reverse_geocoder/core/geo_data/def.cpp
@@ -0,0 +1 @@
+#include "def.h"
diff --git a/library/cpp/reverse_geocoder/core/geo_data/def.h b/library/cpp/reverse_geocoder/core/geo_data/def.h
new file mode 100644
index 0000000000..d3e331d873
--- /dev/null
+++ b/library/cpp/reverse_geocoder/core/geo_data/def.h
@@ -0,0 +1,35 @@
+#pragma once
+
+#include <library/cpp/reverse_geocoder/core/area_box.h>
+#include <library/cpp/reverse_geocoder/core/common.h>
+#include <library/cpp/reverse_geocoder/core/edge.h>
+#include <library/cpp/reverse_geocoder/core/kv.h>
+#include <library/cpp/reverse_geocoder/core/part.h>
+#include <library/cpp/reverse_geocoder/core/point.h>
+#include <library/cpp/reverse_geocoder/core/polygon.h>
+#include <library/cpp/reverse_geocoder/core/region.h>
+
+namespace NReverseGeocoder {
+ const TVersion GEO_DATA_VERSION_0 = 0;
+ const TVersion GEO_DATA_VERSION_1 = 1;
+
+ const TVersion GEO_DATA_CURRENT_VERSION = GEO_DATA_VERSION_1;
+
+// Geographical data definition. This define need for reflection in map/unmap, show, etc.
+#define GEO_BASE_DEF_GEO_DATA \
+ GEO_BASE_DEF_VAR(TVersion, Version); \
+ GEO_BASE_DEF_ARR(TPoint, Points); \
+ GEO_BASE_DEF_ARR(TEdge, Edges); \
+ GEO_BASE_DEF_ARR(TRef, EdgeRefs); \
+ GEO_BASE_DEF_ARR(TPart, Parts); \
+ GEO_BASE_DEF_ARR(TPolygon, Polygons); \
+ GEO_BASE_DEF_ARR(TRef, PolygonRefs); \
+ GEO_BASE_DEF_ARR(TAreaBox, Boxes); \
+ GEO_BASE_DEF_ARR(char, Blobs); \
+ GEO_BASE_DEF_ARR(TKv, Kvs); \
+ GEO_BASE_DEF_ARR(TRegion, Regions); \
+ GEO_BASE_DEF_ARR(TRawPolygon, RawPolygons); \
+ GEO_BASE_DEF_ARR(TRef, RawEdgeRefs); \
+ // #define GEO_BASE_DEF_GEO_DATA
+
+}
diff --git a/library/cpp/reverse_geocoder/core/geo_data/geo_data.cpp b/library/cpp/reverse_geocoder/core/geo_data/geo_data.cpp
new file mode 100644
index 0000000000..be3310b291
--- /dev/null
+++ b/library/cpp/reverse_geocoder/core/geo_data/geo_data.cpp
@@ -0,0 +1 @@
+#include "geo_data.h"
diff --git a/library/cpp/reverse_geocoder/core/geo_data/geo_data.h b/library/cpp/reverse_geocoder/core/geo_data/geo_data.h
new file mode 100644
index 0000000000..7cb76bcddc
--- /dev/null
+++ b/library/cpp/reverse_geocoder/core/geo_data/geo_data.h
@@ -0,0 +1,24 @@
+#pragma once
+
+#include "def.h"
+
+namespace NReverseGeocoder {
+ class IGeoData {
+#define GEO_BASE_DEF_VAR(TVar, Var) \
+ virtual const TVar& Var() const = 0;
+
+#define GEO_BASE_DEF_ARR(TArr, Arr) \
+ virtual const TArr* Arr() const = 0; \
+ virtual TNumber Arr##Number() const = 0;
+
+ public:
+ GEO_BASE_DEF_GEO_DATA
+
+#undef GEO_BASE_DEF_VAR
+#undef GEO_BASE_DEF_ARR
+
+ virtual ~IGeoData() {
+ }
+ };
+
+}
diff --git a/library/cpp/reverse_geocoder/core/geo_data/map.cpp b/library/cpp/reverse_geocoder/core/geo_data/map.cpp
new file mode 100644
index 0000000000..312f7d7cb0
--- /dev/null
+++ b/library/cpp/reverse_geocoder/core/geo_data/map.cpp
@@ -0,0 +1,203 @@
+#include "map.h"
+
+#include <library/cpp/reverse_geocoder/library/log.h>
+#include <library/cpp/reverse_geocoder/library/system.h>
+#include <library/cpp/reverse_geocoder/proto/geo_data.pb.h>
+
+#include <library/cpp/digest/crc32c/crc32c.h>
+
+#include <util/generic/algorithm.h>
+#include <util/generic/buffer.h>
+#include <util/generic/vector.h>
+#include <util/network/address.h>
+#include <util/system/filemap.h>
+#include <util/system/unaligned_mem.h>
+
+using namespace NReverseGeocoder;
+
+static const TNumber CRC_SIZE = 3;
+
+void NReverseGeocoder::TGeoDataMap::Init() {
+#define GEO_BASE_DEF_VAR(TVar, Var) \
+ Var##_ = TVar();
+
+#define GEO_BASE_DEF_ARR(TArr, Arr) \
+ Arr##_ = nullptr; \
+ Arr##Number_ = 0;
+
+ GEO_BASE_DEF_GEO_DATA
+
+#undef GEO_BASE_DEF_VAR
+#undef GEO_BASE_DEF_ARR
+}
+
+NReverseGeocoder::TGeoDataMap::TGeoDataMap()
+ : Data_(nullptr)
+ , Size_(0)
+{
+ Init();
+}
+
+static bool CheckMemoryConsistency(const NProto::TGeoData& g) {
+ TVector<std::pair<intptr_t, intptr_t>> segments;
+
+#define GEO_BASE_DEF_VAR(TVar, Var) \
+ // undef
+
+#define GEO_BASE_DEF_ARR(TArr, Arr) \
+ if (g.Get##Arr##Number() > 0) { \
+ intptr_t const beg = g.Get##Arr(); \
+ intptr_t const end = g.Get##Arr() + g.Get##Arr##Number() * sizeof(TArr); \
+ segments.emplace_back(beg, end); \
+ }
+
+ GEO_BASE_DEF_GEO_DATA
+
+#undef GEO_BASE_DEF_VAR
+#undef GEO_BASE_DEF_ARR
+
+ Sort(segments.begin(), segments.end());
+
+ for (size_t i = 0; i + 1 < segments.size(); ++i)
+ if (segments[i].second > segments[i + 1].first)
+ return false;
+
+ return true;
+}
+
+void NReverseGeocoder::TGeoDataMap::Remap() {
+ Init();
+
+ if (!Data_)
+ return;
+
+ const ui64 headerSize = ntohl(ReadUnaligned<ui64>(Data_));
+
+ NProto::TGeoData header;
+ if (!header.ParseFromArray(Data_ + sizeof(ui64), headerSize))
+ ythrow yexception() << "Unable parse geoData header";
+
+ if (header.GetMagic() != SYSTEM_ENDIAN_FLAG)
+ ythrow yexception() << "Different endianness in geoData and host";
+
+ if (!CheckMemoryConsistency(header))
+ ythrow yexception() << "Memory is not consistent!";
+
+#define GEO_BASE_DEF_VAR(TVar, Var) \
+ Var##_ = header.Get##Var();
+
+#define GEO_BASE_DEF_ARR(TArr, Arr) \
+ GEO_BASE_DEF_VAR(TNumber, Arr##Number); \
+ if (Arr##Number() > 0) { \
+ const intptr_t offset = header.Get##Arr(); \
+ Arr##_ = (TArr*)(((intptr_t)Data_) + offset); \
+ const ui32 hash = Crc32c(Arr##_, std::min(Arr##Number_, CRC_SIZE) * sizeof(TArr)); \
+ if (hash != header.Get##Arr##Crc32()) \
+ ythrow yexception() << "Wrong crc32 for " << #Arr; \
+ }
+
+ GEO_BASE_DEF_GEO_DATA
+
+#undef GEO_BASE_DEF_VAR
+#undef GEO_BASE_DEF_ARR
+
+ if (Version() != GEO_DATA_CURRENT_VERSION)
+ ythrow yexception() << "Unable use version " << Version()
+ << "(current version is " << GEO_DATA_CURRENT_VERSION << ")";
+}
+
+static size_t HeaderSize() {
+ NProto::TGeoData header;
+ header.SetMagic(std::numeric_limits<decltype(header.GetMagic())>::max());
+
+#define GEO_BASE_DEF_VAR(TVar, Var) \
+ header.Set##Var(std::numeric_limits<decltype(header.Get##Var())>::max());
+
+#define GEO_BASE_DEF_ARR(TArr, Arr) \
+ GEO_BASE_DEF_VAR(TNumber, Arr##Number); \
+ header.Set##Arr(std::numeric_limits<decltype(header.Get##Arr())>::max()); \
+ header.Set##Arr##Crc32(std::numeric_limits<decltype(header.Get##Arr##Crc32())>::max());
+
+ GEO_BASE_DEF_GEO_DATA
+
+#undef GEO_BASE_DEF_VAR
+#undef GEO_BASE_DEF_ARR
+
+ return header.ByteSize();
+}
+
+static const char* Serialize(const IGeoData& g, TBlockAllocator* allocator, size_t* size) {
+ size_t const preAllocatedSize = allocator->TotalAllocatedSize();
+ char* data = (char*)allocator->Allocate(HeaderSize() + sizeof(ui64));
+
+ NProto::TGeoData header;
+ header.SetMagic(SYSTEM_ENDIAN_FLAG);
+
+#define GEO_BASE_DEF_VAR(TVar, Var) \
+ header.Set##Var(g.Var());
+
+#define GEO_BASE_DEF_ARR(TArr, Arr) \
+ GEO_BASE_DEF_VAR(TNumber, Arr##Number); \
+ if (g.Arr##Number() > 0) { \
+ TArr* arr = (TArr*)allocator->Allocate(sizeof(TArr) * g.Arr##Number()); \
+ memcpy(arr, g.Arr(), sizeof(TArr) * g.Arr##Number()); \
+ header.Set##Arr((ui64)(((intptr_t)arr) - ((intptr_t)data))); \
+ header.Set##Arr##Crc32(Crc32c(arr, std::min(g.Arr##Number(), CRC_SIZE) * sizeof(TArr))); \
+ };
+
+ GEO_BASE_DEF_GEO_DATA
+
+#undef GEO_BASE_DEF_VAR
+#undef GEO_BASE_DEF_ARR
+
+ const auto str = header.SerializeAsString();
+ WriteUnaligned<ui64>(data, (ui64)htonl(str.size()));
+ memcpy(data + sizeof(ui64), str.data(), str.size());
+
+ if (size)
+ *size = allocator->TotalAllocatedSize() - preAllocatedSize;
+
+ return data;
+}
+
+static size_t TotalByteSize(const IGeoData& g) {
+ size_t total_size = TBlockAllocator::AllocateSize(HeaderSize() + sizeof(ui64));
+
+#define GEO_BASE_DEF_VAR(TVar, Var) \
+ // undef
+
+#define GEO_BASE_DEF_ARR(TArr, Arr) \
+ total_size += TBlockAllocator::AllocateSize(sizeof(TArr) * g.Arr##Number());
+
+ GEO_BASE_DEF_GEO_DATA
+
+#undef GEO_BASE_DEF_VAR
+#undef GEO_BASE_DEF_ARR
+
+ return total_size;
+}
+
+NReverseGeocoder::TGeoDataMap::TGeoDataMap(const IGeoData& geoData, TBlockAllocator* allocator)
+ : TGeoDataMap()
+{
+ Data_ = Serialize(geoData, allocator, &Size_);
+ Remap();
+}
+
+void NReverseGeocoder::TGeoDataMap::SerializeToFile(const TString& path, const IGeoData& data) {
+ TBlob data_blob = SerializeToBlob(data);
+
+ TFile file(path, CreateAlways | RdWr);
+ file.Write(data_blob.Data(), data_blob.Length());
+}
+
+TBlob NReverseGeocoder::TGeoDataMap::SerializeToBlob(const IGeoData& data) {
+ TBuffer buf;
+ buf.Resize(TotalByteSize(data));
+ memset(buf.data(), 0, buf.size());
+
+ TBlockAllocator allocator(buf.Data(), buf.Size());
+ TGeoDataMap(data, &allocator);
+
+ return TBlob::FromBuffer(buf);
+}
diff --git a/library/cpp/reverse_geocoder/core/geo_data/map.h b/library/cpp/reverse_geocoder/core/geo_data/map.h
new file mode 100644
index 0000000000..e466bd912e
--- /dev/null
+++ b/library/cpp/reverse_geocoder/core/geo_data/map.h
@@ -0,0 +1,89 @@
+#pragma once
+
+#include "geo_data.h"
+
+#include <library/cpp/reverse_geocoder/library/block_allocator.h>
+
+#include <util/memory/blob.h>
+
+namespace NReverseGeocoder {
+ class TGeoDataMap: public IGeoData, public TNonCopyable {
+#define GEO_BASE_DEF_VAR(TVar, Var) \
+public: \
+ const TVar& Var() const override { \
+ return Var##_; \
+ } \
+ \
+private: \
+ TVar Var##_;
+
+#define GEO_BASE_DEF_ARR(TArr, Arr) \
+public: \
+ const TArr* Arr() const override { \
+ return Arr##_; \
+ } \
+ TNumber Arr##Number() const override { \
+ return Arr##Number_; \
+ } \
+ \
+private: \
+ TNumber Arr##Number_; \
+ const TArr* Arr##_;
+
+ GEO_BASE_DEF_GEO_DATA
+
+#undef GEO_BASE_DEF_VAR
+#undef GEO_BASE_DEF_ARR
+
+ public:
+ TGeoDataMap();
+
+ static void SerializeToFile(const TString& path, const IGeoData& data);
+
+ static TBlob SerializeToBlob(const IGeoData& data);
+
+ TGeoDataMap(const IGeoData& data, TBlockAllocator* allocator);
+
+ TGeoDataMap(const char* data, size_t size)
+ : TGeoDataMap()
+ {
+ Data_ = data;
+ Size_ = size;
+ Remap();
+ }
+
+ TGeoDataMap(TGeoDataMap&& dat)
+ : TGeoDataMap()
+ {
+ DoSwap(Data_, dat.Data_);
+ DoSwap(Size_, dat.Size_);
+ Remap();
+ dat.Remap();
+ }
+
+ TGeoDataMap& operator=(TGeoDataMap&& dat) {
+ DoSwap(Data_, dat.Data_);
+ DoSwap(Size_, dat.Size_);
+ Remap();
+ dat.Remap();
+ return *this;
+ }
+
+ const char* Data() const {
+ return Data_;
+ }
+
+ size_t Size() const {
+ return Size_;
+ }
+
+ private:
+ void Init();
+
+ void Remap();
+
+ const char* Data_;
+ size_t Size_;
+ };
+
+}
diff --git a/library/cpp/reverse_geocoder/core/geo_data/proxy.cpp b/library/cpp/reverse_geocoder/core/geo_data/proxy.cpp
new file mode 100644
index 0000000000..5ff2d13783
--- /dev/null
+++ b/library/cpp/reverse_geocoder/core/geo_data/proxy.cpp
@@ -0,0 +1 @@
+#include "proxy.h"
diff --git a/library/cpp/reverse_geocoder/core/geo_data/proxy.h b/library/cpp/reverse_geocoder/core/geo_data/proxy.h
new file mode 100644
index 0000000000..fecb9fc7cf
--- /dev/null
+++ b/library/cpp/reverse_geocoder/core/geo_data/proxy.h
@@ -0,0 +1,68 @@
+#pragma once
+
+#include "geo_data.h"
+#include "map.h"
+
+#include <util/generic/ptr.h>
+#include <util/system/filemap.h>
+
+namespace NReverseGeocoder {
+ class IGeoDataProxy {
+ public:
+ virtual const IGeoData* GeoData() const = 0;
+
+ virtual ~IGeoDataProxy() {
+ }
+ };
+
+ using TGeoDataProxyPtr = THolder<IGeoDataProxy>;
+
+ class TGeoDataMapProxy: public IGeoDataProxy, public TNonCopyable {
+ public:
+ explicit TGeoDataMapProxy(const char* path)
+ : MemFile_(path)
+ {
+ MemFile_.Map(0, MemFile_.Length());
+ GeoData_ = TGeoDataMap((const char*)MemFile_.Ptr(), MemFile_.MappedSize());
+ }
+
+ const IGeoData* GeoData() const override {
+ return &GeoData_;
+ }
+
+ private:
+ TFileMap MemFile_;
+ TGeoDataMap GeoData_;
+ };
+
+ class TGeoDataWrapper: public IGeoDataProxy, public TNonCopyable {
+ public:
+ explicit TGeoDataWrapper(const IGeoData& g)
+ : GeoData_(&g)
+ {
+ }
+
+ const IGeoData* GeoData() const override {
+ return GeoData_;
+ }
+
+ private:
+ const IGeoData* GeoData_;
+ };
+
+ class TGeoDataRawProxy: public IGeoDataProxy, public TNonCopyable {
+ public:
+ TGeoDataRawProxy(const char* data, size_t dataSize)
+ : GeoData_(data, dataSize)
+ {
+ }
+
+ const IGeoData* GeoData() const override {
+ return &GeoData_;
+ }
+
+ private:
+ TGeoDataMap GeoData_;
+ };
+
+}
diff --git a/library/cpp/reverse_geocoder/core/kv.cpp b/library/cpp/reverse_geocoder/core/kv.cpp
new file mode 100644
index 0000000000..a48e9c947e
--- /dev/null
+++ b/library/cpp/reverse_geocoder/core/kv.cpp
@@ -0,0 +1 @@
+#include "kv.h"
diff --git a/library/cpp/reverse_geocoder/core/kv.h b/library/cpp/reverse_geocoder/core/kv.h
new file mode 100644
index 0000000000..639c21de52
--- /dev/null
+++ b/library/cpp/reverse_geocoder/core/kv.h
@@ -0,0 +1,13 @@
+#pragma once
+
+#include "common.h"
+
+namespace NReverseGeocoder {
+ // k and v is offsets on blobs in geographical data blobs array. See geo_data.h
+ // for details.
+ struct TKv {
+ TNumber K;
+ TNumber V;
+ };
+
+}
diff --git a/library/cpp/reverse_geocoder/core/location.cpp b/library/cpp/reverse_geocoder/core/location.cpp
new file mode 100644
index 0000000000..b2d2f54d12
--- /dev/null
+++ b/library/cpp/reverse_geocoder/core/location.cpp
@@ -0,0 +1 @@
+#include "location.h"
diff --git a/library/cpp/reverse_geocoder/core/location.h b/library/cpp/reverse_geocoder/core/location.h
new file mode 100644
index 0000000000..5aa3198684
--- /dev/null
+++ b/library/cpp/reverse_geocoder/core/location.h
@@ -0,0 +1,21 @@
+#pragma once
+
+namespace NReverseGeocoder {
+ struct TLocation {
+ double Lon;
+ double Lat;
+
+ TLocation()
+ : Lon(0)
+ , Lat(0)
+ {
+ }
+
+ TLocation(double lon, double lat)
+ : Lon(lon)
+ , Lat(lat)
+ {
+ }
+ };
+
+}
diff --git a/library/cpp/reverse_geocoder/core/part.cpp b/library/cpp/reverse_geocoder/core/part.cpp
new file mode 100644
index 0000000000..c973d2171a
--- /dev/null
+++ b/library/cpp/reverse_geocoder/core/part.cpp
@@ -0,0 +1,29 @@
+#include "part.h"
+
+#include <library/cpp/reverse_geocoder/library/unaligned_iter.h>
+
+#include <util/generic/algorithm.h>
+
+using namespace NReverseGeocoder;
+
+bool NReverseGeocoder::TPart::Contains(const TPoint& point, TNumber edgeRefsNumber, const TRef* edgeRefs,
+ const TEdge* edges, const TPoint* points) const {
+ auto edgeRefsBegin = UnalignedIter(edgeRefs) + EdgeRefsOffset;
+ auto edgeRefsEnd = edgeRefsBegin + edgeRefsNumber;
+
+ // Find lower bound edge, which lying below given point.
+ auto cmp = [&](const TRef& e, const TPoint& p) {
+ return edges[e].Lower(p, points);
+ };
+
+ auto edgeRef = LowerBound(edgeRefsBegin, edgeRefsEnd, point, cmp);
+
+ if (edgeRef == edgeRefsEnd)
+ return false;
+
+ if (edges[*edgeRef].Contains(point, points))
+ return true;
+
+ // If the point is inside of the polygon then it will intersect the edge an odd number of times.
+ return (edgeRef - edgeRefsBegin) % 2 == 1;
+}
diff --git a/library/cpp/reverse_geocoder/core/part.h b/library/cpp/reverse_geocoder/core/part.h
new file mode 100644
index 0000000000..9b24fee96f
--- /dev/null
+++ b/library/cpp/reverse_geocoder/core/part.h
@@ -0,0 +1,26 @@
+#pragma once
+
+#include "common.h"
+#include "edge.h"
+#include "point.h"
+
+namespace NReverseGeocoder {
+ // TPart contains version of persistent scanline. Parts lying in geofraphical data parts array,
+ // ordered by Coordinate for each polygon. Variable EdgeRefsOffset refers on EdgeRefs array for
+ // this part. For optimal usage of memory, part does not contain "EdgeRefsNumber" variable, because
+ // it's can be computed as parts[i + 1].EdgeRefsOffset - parts[i].EdgeRefsOffset for every part
+ // in geographical data. Especially for this, added fake part into IGeoData with correct
+ // EdgeRefsOffset. Refs in EdgeRefs are in increasing order for each part. It is necessary to
+ // quickly determine how many edges is under the point. See generator/ for details.
+ struct Y_PACKED TPart {
+ TCoordinate Coordinate;
+ TNumber EdgeRefsOffset;
+
+ // Checks point lying under odd numbers of edges or on edge.
+ bool Contains(const TPoint& point, TNumber edgeRefsNumber, const TRef* edgeRefs,
+ const TEdge* edges, const TPoint* points) const;
+ };
+
+ static_assert(sizeof(TPart) == 8, "NReverseGeocoder::TPart size mismatch");
+
+}
diff --git a/library/cpp/reverse_geocoder/core/point.cpp b/library/cpp/reverse_geocoder/core/point.cpp
new file mode 100644
index 0000000000..396e27e596
--- /dev/null
+++ b/library/cpp/reverse_geocoder/core/point.cpp
@@ -0,0 +1 @@
+#include "point.h"
diff --git a/library/cpp/reverse_geocoder/core/point.h b/library/cpp/reverse_geocoder/core/point.h
new file mode 100644
index 0000000000..75f1dfc1b4
--- /dev/null
+++ b/library/cpp/reverse_geocoder/core/point.h
@@ -0,0 +1,52 @@
+#pragma once
+
+#include "common.h"
+#include "location.h"
+
+namespace NReverseGeocoder {
+ struct Y_PACKED TPoint {
+ TCoordinate X;
+ TCoordinate Y;
+
+ TPoint()
+ : X(0)
+ , Y(0)
+ {
+ }
+
+ TPoint(const TCoordinate& x1, const TCoordinate& y1)
+ : X(x1)
+ , Y(y1)
+ {
+ }
+
+ explicit TPoint(const TLocation& l)
+ : X(ToCoordinate(l.Lon))
+ , Y(ToCoordinate(l.Lat))
+ {
+ }
+
+ TPoint operator-(const TPoint& p) const {
+ return TPoint(X - p.X, Y - p.Y);
+ }
+
+ bool operator==(const TPoint& b) const {
+ return X == b.X && Y == b.Y;
+ }
+
+ bool operator!=(const TPoint& b) const {
+ return X != b.X || Y != b.Y;
+ }
+
+ bool operator<(const TPoint& b) const {
+ return X < b.X || (X == b.X && Y < b.Y);
+ }
+
+ TSquare Cross(const TPoint& p) const {
+ return 1ll * X * p.Y - 1ll * Y * p.X;
+ }
+ };
+
+ static_assert(sizeof(TPoint) == 8, "NReverseGeocoder::TPoint size mismatch");
+
+}
diff --git a/library/cpp/reverse_geocoder/core/polygon.cpp b/library/cpp/reverse_geocoder/core/polygon.cpp
new file mode 100644
index 0000000000..2baac2d229
--- /dev/null
+++ b/library/cpp/reverse_geocoder/core/polygon.cpp
@@ -0,0 +1,91 @@
+#include "polygon.h"
+
+#include <util/generic/algorithm.h>
+
+using namespace NReverseGeocoder;
+
+static bool Check(const TPart* part, const TPoint& point, const TRef* edgeRefs,
+ const TEdge* edges, const TPoint* points) {
+ const TNumber edgeRefsNumber = (part + 1)->EdgeRefsOffset - part->EdgeRefsOffset;
+ return part->Contains(point, edgeRefsNumber, edgeRefs, edges, points);
+}
+
+bool NReverseGeocoder::TPolygon::Contains(const TPoint& point, const TPart* parts, const TRef* edgeRefs,
+ const TEdge* edges, const TPoint* points) const {
+ if (!Bbox.Contains(point))
+ return false;
+
+ parts += PartsOffset;
+ const TPart* partsEnd = parts + PartsNumber;
+
+ // Find lower bound part, which can contains given point.
+ const TPart* part = LowerBound(parts, partsEnd, point, [&](const TPart& a, const TPoint& b) {
+ return a.Coordinate < b.X;
+ });
+
+ if (part->Coordinate > point.X) {
+ if (part == parts)
+ return false;
+ --part;
+ }
+
+ if (point.X < part->Coordinate || point.X > (part + 1)->Coordinate)
+ return false;
+
+ if (point.X == part->Coordinate)
+ if (part != parts && Check(part - 1, point, edgeRefs, edges, points))
+ return true;
+
+ return Check(part, point, edgeRefs, edges, points);
+}
+
+bool NReverseGeocoder::TPolygonBase::Better(const TPolygonBase& p, const TRegion* regions,
+ TNumber regionsNumber) const {
+ if (Square < p.Square)
+ return true;
+
+ if (Square == p.Square) {
+ const TRegion* begin = regions;
+ const TRegion* end = regions + regionsNumber;
+
+ const TRegion* r1 = LowerBound(begin, end, TGeoId(RegionId));
+ const TRegion* r2 = LowerBound(begin, end, TGeoId(p.RegionId));
+
+ if (r1 == end || r1->RegionId != RegionId)
+ return false;
+
+ if (r2 == end || r2->RegionId != p.RegionId)
+ return false;
+
+ return r1->Better(*r2);
+ }
+
+ return false;
+}
+
+bool NReverseGeocoder::TRawPolygon::Contains(const TPoint& point, const TRef* edgeRefs, const TEdge* edges,
+ const TPoint* points) const {
+ if (!Bbox.Contains(point))
+ return false;
+
+ edgeRefs += EdgeRefsOffset;
+
+ TNumber intersections = 0;
+ for (TNumber i = 0; i < EdgeRefsNumber; ++i) {
+ const TEdge& e = edges[edgeRefs[i]];
+
+ if (e.Contains(point, points))
+ return true;
+
+ TPoint a = points[e.Beg];
+ TPoint b = points[e.End];
+
+ if (a.X > b.X)
+ DoSwap(a, b);
+
+ if (a.X < point.X && b.X >= point.X && e.Lower(point, points))
+ ++intersections;
+ }
+
+ return intersections % 2 == 1;
+}
diff --git a/library/cpp/reverse_geocoder/core/polygon.h b/library/cpp/reverse_geocoder/core/polygon.h
new file mode 100644
index 0000000000..065bba1e38
--- /dev/null
+++ b/library/cpp/reverse_geocoder/core/polygon.h
@@ -0,0 +1,73 @@
+#pragma once
+
+#include "bbox.h"
+#include "common.h"
+#include "edge.h"
+#include "part.h"
+#include "point.h"
+#include "region.h"
+
+namespace NReverseGeocoder {
+#pragma pack(push, 1)
+
+ struct TPolygonBase {
+ enum EType {
+ TYPE_UNKNOWN = 0,
+ TYPE_INNER = 1,
+ TYPE_OUTER = 2,
+ };
+
+ // If TYPE_INNER and polygon contains given point, this means that region with RegionId
+ // does not contains point.
+ EType Type;
+
+ ui32 Unused1;
+
+ // Geographical data indetifiers.
+ TGeoId RegionId;
+ TGeoId PolygonId;
+
+ // Rectangle in which lies that polygon.
+ TBoundingBox Bbox;
+
+ // Square of polygon. Need for determine which polygon is better. See better member function.
+ TSquare Square;
+
+ // Total points number of given polygon.
+ TNumber PointsNumber;
+
+ // Check that this polygon better then given polygon, which means that this polygons lying
+ // deeper then given in polygons hierarchy.
+ bool Better(const TPolygonBase& p, const TRegion* regions, TNumber regionsNumber) const;
+ };
+
+ // Polygon is a representation of persistent scanline data structure.
+ struct TPolygon: public TPolygonBase {
+ // Versions of persistent scanline.
+ TNumber PartsOffset;
+ TNumber PartsNumber;
+ ui32 Unused2;
+
+ // Fast point in polygon test using persistent scanline. You can see how this data structure
+ // generated in generator/.
+ bool Contains(const TPoint& point, const TPart* parts, const TRef* edgeRefs,
+ const TEdge* edges, const TPoint* points) const;
+ };
+
+ static_assert(sizeof(TPolygon) == 64, "NReverseGeocoder::TPolygon size mismatch");
+
+ // Raw polygon is a polygon representation for slow tests.
+ struct TRawPolygon: public TPolygonBase {
+ // Raw polygon edge refs.
+ TNumber EdgeRefsOffset;
+ TNumber EdgeRefsNumber;
+ ui32 Unused2;
+
+ bool Contains(const TPoint& point, const TRef* edgeRefs, const TEdge* edges,
+ const TPoint* points) const;
+ };
+
+ static_assert(sizeof(TRawPolygon) == 64, "NReverseGeocoder::TRawPolygon size mismatch");
+
+#pragma pack(pop)
+}
diff --git a/library/cpp/reverse_geocoder/core/region.cpp b/library/cpp/reverse_geocoder/core/region.cpp
new file mode 100644
index 0000000000..62b4acd0a1
--- /dev/null
+++ b/library/cpp/reverse_geocoder/core/region.cpp
@@ -0,0 +1 @@
+#include "region.h"
diff --git a/library/cpp/reverse_geocoder/core/region.h b/library/cpp/reverse_geocoder/core/region.h
new file mode 100644
index 0000000000..4b010c7103
--- /dev/null
+++ b/library/cpp/reverse_geocoder/core/region.h
@@ -0,0 +1,37 @@
+#pragma once
+
+#include "common.h"
+
+namespace NReverseGeocoder {
+ struct Y_PACKED TRegion {
+ TGeoId RegionId;
+ TNumber KvsOffset;
+ TNumber KvsNumber;
+ TSquare Square;
+ TNumber PolygonsNumber;
+ ui32 Unused;
+
+ bool operator==(const TRegion& r) const {
+ return RegionId == r.RegionId;
+ }
+
+ bool operator<(const TRegion& r) const {
+ return RegionId < r.RegionId;
+ }
+
+ bool operator<(const TGeoId& r) const {
+ return RegionId < r;
+ }
+
+ friend bool operator<(const TGeoId& regionId, const TRegion& r) {
+ return regionId < r.RegionId;
+ }
+
+ bool Better(const TRegion& r) const {
+ return Square < r.Square;
+ }
+ };
+
+ static_assert(sizeof(TRegion) == 32, "NReverseGeocoder::TRegion size mismatch");
+
+}
diff --git a/library/cpp/reverse_geocoder/core/reverse_geocoder.cpp b/library/cpp/reverse_geocoder/core/reverse_geocoder.cpp
new file mode 100644
index 0000000000..d73e4f2648
--- /dev/null
+++ b/library/cpp/reverse_geocoder/core/reverse_geocoder.cpp
@@ -0,0 +1,182 @@
+#include "reverse_geocoder.h"
+#include "geo_data/geo_data.h"
+
+#include <library/cpp/reverse_geocoder/library/unaligned_iter.h>
+
+#include <util/generic/algorithm.h>
+#include <util/system/unaligned_mem.h>
+
+using namespace NReverseGeocoder;
+
+static bool PolygonContains(const TPolygon& p, const TPoint& point, const IGeoData& geoData) {
+ const TPart* parts = geoData.Parts();
+ const TRef* edgeRefs = geoData.EdgeRefs();
+ const TEdge* edges = geoData.Edges();
+ const TPoint* points = geoData.Points();
+ return p.Contains(point, parts, edgeRefs, edges, points);
+}
+
+template <typename TAnswer>
+static void UpdateAnswer(const TAnswer** answer, const TAnswer& polygon,
+ const IGeoData& geoData) {
+ if (!*answer) {
+ *answer = &polygon;
+ } else {
+ const TRegion* regions = geoData.Regions();
+ const TNumber regionsNumber = geoData.RegionsNumber();
+ if (!(*answer)->Better(polygon, regions, regionsNumber))
+ *answer = &polygon;
+ }
+}
+
+static void SortDebug(TReverseGeocoder::TDebug* debug, const IGeoData& geoData) {
+ const TRegion* regions = geoData.Regions();
+ const TNumber regionsNumber = geoData.RegionsNumber();
+
+ auto cmp = [&](const TGeoId& a, const TGeoId& b) {
+ const TRegion* r1 = LowerBound(regions, regions + regionsNumber, a);
+ const TRegion* r2 = LowerBound(regions, regions + regionsNumber, b);
+ return r1->Better(*r2);
+ };
+
+ Sort(debug->begin(), debug->end(), cmp);
+}
+
+TGeoId NReverseGeocoder::TReverseGeocoder::Lookup(const TLocation& location, TDebug* debug) const {
+ const IGeoData& geoData = *GeoDataProxy_->GeoData();
+
+ if (debug)
+ debug->clear();
+
+ const TPoint point(location);
+ const TRef boxRef = LookupAreaBox(point);
+
+ if (boxRef >= geoData.BoxesNumber())
+ return UNKNOWN_GEO_ID;
+
+ const TNumber refsOffset = geoData.Boxes()[boxRef].PolygonRefsOffset;
+ const TNumber refsNumber = geoData.Boxes()[boxRef].PolygonRefsNumber;
+
+ const TPolygon* answer = nullptr;
+
+ const TPolygon* p = geoData.Polygons();
+ const auto refsBegin = UnalignedIter(geoData.PolygonRefs()) + refsOffset;
+ const auto refsEnd = refsBegin + refsNumber;
+
+ for (auto iterL = refsBegin, iterR = refsBegin; iterL < refsEnd; iterL = iterR) {
+ iterR = iterL + 1;
+
+ if (PolygonContains(p[*iterL], point, geoData)) {
+ if (p[*iterL].Type == TPolygon::TYPE_INNER) {
+ // All polygons with same RegionId must be skipped if polygon is inner.
+ // In geoData small inner polygons stored before big outer polygons.
+ while (iterR < refsEnd && p[*iterL].RegionId == p[*iterR].RegionId)
+ ++iterR;
+
+ } else {
+ UpdateAnswer(&answer, p[*iterL], geoData);
+
+ if (debug)
+ debug->push_back(p[*iterL].RegionId);
+
+ while (iterR < refsEnd && p[*iterL].RegionId == p[*iterR].RegionId)
+ ++iterR;
+ }
+ }
+ }
+
+ if (debug)
+ SortDebug(debug, geoData);
+
+ return answer ? answer->RegionId : UNKNOWN_GEO_ID;
+}
+
+TGeoId NReverseGeocoder::TReverseGeocoder::RawLookup(const TLocation& location, TDebug* debug) const {
+ const IGeoData& geoData = *GeoDataProxy_->GeoData();
+
+ if (debug)
+ debug->clear();
+
+ const TPoint point(location);
+
+ const TRawPolygon* borders = geoData.RawPolygons();
+ const TNumber bordersNumber = geoData.RawPolygonsNumber();
+
+ const TRawPolygon* answer = nullptr;
+
+ TNumber i = 0;
+ while (i < bordersNumber) {
+ if (borders[i].Contains(point, geoData.RawEdgeRefs(), geoData.Edges(), geoData.Points())) {
+ if (borders[i].Type == TRawPolygon::TYPE_INNER) {
+ TNumber j = i + 1;
+ while (j < bordersNumber && borders[i].RegionId == borders[j].RegionId)
+ ++j;
+
+ i = j;
+
+ } else {
+ UpdateAnswer(&answer, borders[i], geoData);
+
+ if (debug)
+ debug->push_back(borders[i].RegionId);
+
+ TNumber j = i + 1;
+ while (j < bordersNumber && borders[i].RegionId == borders[j].RegionId)
+ ++j;
+
+ i = j;
+ }
+ } else {
+ ++i;
+ }
+ }
+
+ if (debug)
+ SortDebug(debug, geoData);
+
+ return answer ? answer->RegionId : UNKNOWN_GEO_ID;
+}
+
+bool NReverseGeocoder::TReverseGeocoder::EachKv(TGeoId regionId, TKvCallback callback) const {
+ const IGeoData& g = *GeoDataProxy_->GeoData();
+
+ const TRegion* begin = g.Regions();
+ const TRegion* end = begin + g.RegionsNumber();
+
+ const TRegion* region = LowerBound(begin, end, regionId);
+
+ if (region == end || region->RegionId != regionId)
+ return false;
+
+ const TKv* kvs = g.Kvs() + region->KvsOffset;
+ const char* blobs = g.Blobs();
+
+ for (TNumber i = 0; i < region->KvsNumber; ++i) {
+ const char* k = blobs + kvs[i].K;
+ const char* v = blobs + kvs[i].V;
+ callback(k, v);
+ }
+
+ return true;
+}
+
+void NReverseGeocoder::TReverseGeocoder::EachPolygon(TPolygonCallback callback) const {
+ const IGeoData& g = *GeoDataProxy_->GeoData();
+
+ for (TNumber i = 0; i < g.PolygonsNumber(); ++i)
+ callback(g.Polygons()[i]);
+}
+
+void NReverseGeocoder::TReverseGeocoder::EachPart(const TPolygon& polygon, TPartCallback callback) const {
+ const IGeoData& g = *GeoDataProxy_->GeoData();
+
+ const TNumber partsOffset = polygon.PartsOffset;
+ const TNumber partsNumber = polygon.PartsNumber;
+
+ for (TNumber i = partsOffset; i < partsOffset + partsNumber; ++i) {
+ const TPart& part = g.Parts()[i];
+ const TPart& npart = g.Parts()[i + 1];
+ const TNumber edgeRefsNumber = npart.EdgeRefsOffset - part.EdgeRefsOffset;
+ callback(part, edgeRefsNumber);
+ }
+}
diff --git a/library/cpp/reverse_geocoder/core/reverse_geocoder.h b/library/cpp/reverse_geocoder/core/reverse_geocoder.h
new file mode 100644
index 0000000000..c74eddb40e
--- /dev/null
+++ b/library/cpp/reverse_geocoder/core/reverse_geocoder.h
@@ -0,0 +1,73 @@
+#pragma once
+
+#include "common.h"
+#include "geo_data/geo_data.h"
+#include "geo_data/proxy.h"
+
+#include <util/generic/noncopyable.h>
+#include <util/generic/vector.h>
+
+#include <functional>
+
+namespace NReverseGeocoder {
+ const TGeoId UNKNOWN_GEO_ID = static_cast<TGeoId>(-1);
+
+ // NOTE: Be careful! It's work fine and fast on real world dataset.
+ // But in theory it's can spent O(n^2) memory (on real world dataset it's just 6n).
+ // Point in polygon test will be O(log n) always. Memory spent will be O(n) in future!
+ class TReverseGeocoder: public TNonCopyable {
+ public:
+ using TDebug = TVector<TGeoId>;
+ using TKvCallback = std::function<void(const char*, const char*)>;
+ using TPolygonCallback = std::function<void(const TPolygon&)>;
+ using TPartCallback = std::function<void(const TPart&, TNumber)>;
+
+ TReverseGeocoder()
+ : GeoDataProxy_()
+ {
+ }
+
+ TReverseGeocoder(TReverseGeocoder&& g)
+ : GeoDataProxy_()
+ {
+ DoSwap(GeoDataProxy_, g.GeoDataProxy_);
+ }
+
+ TReverseGeocoder& operator=(TReverseGeocoder&& g) {
+ DoSwap(GeoDataProxy_, g.GeoDataProxy_);
+ return *this;
+ }
+
+ explicit TReverseGeocoder(const char* path)
+ : GeoDataProxy_(new TGeoDataMapProxy(path))
+ {
+ }
+
+ explicit TReverseGeocoder(const IGeoData& geoData)
+ : GeoDataProxy_(new TGeoDataWrapper(geoData))
+ {
+ }
+
+ TReverseGeocoder(const char* data, size_t dataSize)
+ : GeoDataProxy_(new TGeoDataRawProxy(data, dataSize))
+ {
+ }
+
+ TGeoId Lookup(const TLocation& location, TDebug* debug = nullptr) const;
+
+ TGeoId RawLookup(const TLocation& location, TDebug* debug = nullptr) const;
+
+ bool EachKv(TGeoId regionId, TKvCallback callback) const;
+
+ void EachPolygon(TPolygonCallback callback) const;
+
+ void EachPart(const TPolygon& polygon, TPartCallback callback) const;
+
+ const IGeoData& GeoData() const {
+ return *GeoDataProxy_->GeoData();
+ }
+
+ private:
+ TGeoDataProxyPtr GeoDataProxy_;
+ };
+}
diff --git a/library/cpp/reverse_geocoder/core/ya.make b/library/cpp/reverse_geocoder/core/ya.make
new file mode 100644
index 0000000000..9f7dc67464
--- /dev/null
+++ b/library/cpp/reverse_geocoder/core/ya.make
@@ -0,0 +1,28 @@
+LIBRARY()
+
+PEERDIR(
+ library/cpp/reverse_geocoder/library
+ library/cpp/reverse_geocoder/proto
+ library/cpp/digest/crc32c
+)
+
+SRCS(
+ area_box.cpp
+ bbox.cpp
+ common.cpp
+ edge.cpp
+ reverse_geocoder.cpp
+ kv.cpp
+ location.cpp
+ part.cpp
+ point.cpp
+ polygon.cpp
+ region.cpp
+ geo_data/debug.cpp
+ geo_data/def.cpp
+ geo_data/geo_data.cpp
+ geo_data/map.cpp
+ geo_data/proxy.cpp
+)
+
+END()
diff --git a/library/cpp/reverse_geocoder/library/CMakeLists.darwin-x86_64.txt b/library/cpp/reverse_geocoder/library/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..f82b4b8cd1
--- /dev/null
+++ b/library/cpp/reverse_geocoder/library/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,21 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-reverse_geocoder-library)
+target_link_libraries(cpp-reverse_geocoder-library PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+)
+target_sources(cpp-reverse_geocoder-library PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/block_allocator.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/fs.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/log.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/pool_allocator.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/unaligned_iter.cpp
+)
diff --git a/library/cpp/reverse_geocoder/library/CMakeLists.linux-aarch64.txt b/library/cpp/reverse_geocoder/library/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..4b45fce452
--- /dev/null
+++ b/library/cpp/reverse_geocoder/library/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,22 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-reverse_geocoder-library)
+target_link_libraries(cpp-reverse_geocoder-library PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+)
+target_sources(cpp-reverse_geocoder-library PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/block_allocator.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/fs.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/log.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/pool_allocator.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/unaligned_iter.cpp
+)
diff --git a/library/cpp/reverse_geocoder/library/CMakeLists.linux-x86_64.txt b/library/cpp/reverse_geocoder/library/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..4b45fce452
--- /dev/null
+++ b/library/cpp/reverse_geocoder/library/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,22 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-reverse_geocoder-library)
+target_link_libraries(cpp-reverse_geocoder-library PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+)
+target_sources(cpp-reverse_geocoder-library PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/block_allocator.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/fs.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/log.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/pool_allocator.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/unaligned_iter.cpp
+)
diff --git a/library/cpp/reverse_geocoder/library/CMakeLists.txt b/library/cpp/reverse_geocoder/library/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/library/cpp/reverse_geocoder/library/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/library/cpp/reverse_geocoder/library/CMakeLists.windows-x86_64.txt b/library/cpp/reverse_geocoder/library/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..f82b4b8cd1
--- /dev/null
+++ b/library/cpp/reverse_geocoder/library/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,21 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-reverse_geocoder-library)
+target_link_libraries(cpp-reverse_geocoder-library PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+)
+target_sources(cpp-reverse_geocoder-library PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/block_allocator.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/fs.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/log.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/pool_allocator.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/unaligned_iter.cpp
+)
diff --git a/library/cpp/reverse_geocoder/library/block_allocator.cpp b/library/cpp/reverse_geocoder/library/block_allocator.cpp
new file mode 100644
index 0000000000..56f61dc566
--- /dev/null
+++ b/library/cpp/reverse_geocoder/library/block_allocator.cpp
@@ -0,0 +1,40 @@
+#include "block_allocator.h"
+
+using namespace NReverseGeocoder;
+
+static size_t const MEMORY_IS_USED_FLAG = ~0ull;
+static size_t const SIZEOF_SIZE = AlignMemory(sizeof(size_t));
+
+void* NReverseGeocoder::TBlockAllocator::Allocate(size_t number) {
+ number = AlignMemory(number);
+ if (BytesAllocated_ + number + SIZEOF_SIZE > BytesLimit_)
+ ythrow yexception() << "Unable allocate memory";
+ char* begin = ((char*)Data_) + BytesAllocated_;
+ char* end = begin + number;
+ *((size_t*)end) = MEMORY_IS_USED_FLAG;
+ BytesAllocated_ += number + SIZEOF_SIZE;
+ return begin;
+}
+
+size_t NReverseGeocoder::TBlockAllocator::AllocateSize(size_t number) {
+ return AlignMemory(number) + SIZEOF_SIZE;
+}
+
+static void RelaxBlock(char* begin, size_t* number) {
+ while (*number > 0) {
+ char* ptr = begin + *number - SIZEOF_SIZE;
+ if (*((size_t*)ptr) == MEMORY_IS_USED_FLAG)
+ return;
+ *number -= *((size_t*)ptr) + SIZEOF_SIZE;
+ }
+}
+
+void NReverseGeocoder::TBlockAllocator::Deallocate(void* ptr, size_t number) {
+ number = AlignMemory(number);
+ char* begin = (char*)ptr;
+ char* end = begin + number;
+ if (*((size_t*)end) != MEMORY_IS_USED_FLAG)
+ ythrow yexception() << "Trying to deallocate not allocated pointer " << ptr;
+ *((size_t*)end) = number;
+ RelaxBlock((char*)Data_, &BytesAllocated_);
+}
diff --git a/library/cpp/reverse_geocoder/library/block_allocator.h b/library/cpp/reverse_geocoder/library/block_allocator.h
new file mode 100644
index 0000000000..1189d6b25c
--- /dev/null
+++ b/library/cpp/reverse_geocoder/library/block_allocator.h
@@ -0,0 +1,64 @@
+#pragma once
+
+#include "memory.h"
+
+#include <util/generic/yexception.h>
+
+namespace NReverseGeocoder {
+ class TBlockAllocator: public TNonCopyable {
+ public:
+ TBlockAllocator()
+ : Data_(nullptr)
+ , BytesAllocated_(0)
+ , BytesLimit_(0)
+ {
+ }
+
+ TBlockAllocator(void* data, size_t bytesLimit)
+ : Data_(data)
+ , BytesAllocated_(0)
+ , BytesLimit_(bytesLimit)
+ {
+ }
+
+ TBlockAllocator(TBlockAllocator&& a)
+ : TBlockAllocator()
+ {
+ DoSwap(Data_, a.Data_);
+ DoSwap(BytesAllocated_, a.BytesAllocated_);
+ DoSwap(BytesLimit_, a.BytesLimit_);
+ }
+
+ TBlockAllocator& operator=(TBlockAllocator&& a) {
+ DoSwap(Data_, a.Data_);
+ DoSwap(BytesAllocated_, a.BytesAllocated_);
+ DoSwap(BytesLimit_, a.BytesLimit_);
+ return *this;
+ }
+
+ virtual ~TBlockAllocator() {
+ }
+
+ virtual void* Allocate(size_t number);
+
+ static size_t AllocateSize(size_t number);
+
+ virtual void Deallocate(void* ptr, size_t number);
+
+ size_t TotalAllocatedSize() const {
+ return BytesAllocated_;
+ }
+
+ void Setup(void* data, size_t bytesLimit) {
+ Data_ = data;
+ BytesLimit_ = bytesLimit;
+ BytesAllocated_ = 0;
+ }
+
+ private:
+ void* Data_;
+ size_t BytesAllocated_;
+ size_t BytesLimit_;
+ };
+
+}
diff --git a/library/cpp/reverse_geocoder/library/fs.cpp b/library/cpp/reverse_geocoder/library/fs.cpp
new file mode 100644
index 0000000000..98c3b9ef81
--- /dev/null
+++ b/library/cpp/reverse_geocoder/library/fs.cpp
@@ -0,0 +1,18 @@
+#include "fs.h"
+
+#include <util/folder/dirut.h>
+#include <util/string/split.h>
+
+namespace NReverseGeocoder {
+ TVector<TString> GetDataFilesList(const char* input) {
+ if (IsDir(input)) {
+ return GetFileListInDirectory<TVector<TString>>(input);
+ }
+
+ TVector<TString> result;
+ for (const auto& partIt : StringSplitter(input).Split(',')) {
+ result.push_back(TString(partIt.Token()));
+ }
+ return result;
+ }
+}
diff --git a/library/cpp/reverse_geocoder/library/fs.h b/library/cpp/reverse_geocoder/library/fs.h
new file mode 100644
index 0000000000..4435f960c8
--- /dev/null
+++ b/library/cpp/reverse_geocoder/library/fs.h
@@ -0,0 +1,19 @@
+#pragma once
+
+#include <util/folder/iterator.h>
+#include <util/string/vector.h>
+
+namespace NReverseGeocoder {
+ template <typename Cont>
+ Cont GetFileListInDirectory(const char* dirName) {
+ TDirIterator dirIt(dirName, TDirIterator::TOptions(FTS_LOGICAL));
+ Cont dirContent;
+ for (auto file = dirIt.begin(); file != dirIt.end(); ++file) {
+ if (strcmp(file->fts_path, dirName))
+ dirContent.push_back(file->fts_path);
+ }
+ return dirContent;
+ }
+
+ TVector<TString> GetDataFilesList(const char* input);
+}
diff --git a/library/cpp/reverse_geocoder/library/log.cpp b/library/cpp/reverse_geocoder/library/log.cpp
new file mode 100644
index 0000000000..44e6ddf287
--- /dev/null
+++ b/library/cpp/reverse_geocoder/library/log.cpp
@@ -0,0 +1,111 @@
+#include "log.h"
+
+#include <util/datetime/systime.h>
+#include <util/generic/yexception.h>
+#include <util/system/guard.h>
+#include <util/system/mutex.h>
+
+using namespace NReverseGeocoder;
+
+static size_t const TIMESTAMP_LIMIT = 32;
+
+class TLogger {
+public:
+ static TLogger& Inst() {
+ static TLogger logger;
+ return logger;
+ }
+
+ void Setup(IOutputStream& out, ELogLevel level) {
+ Out_ = &out;
+ Level_ = level;
+ }
+
+ void Write(ELogLevel level, const char* message) {
+ if (level <= Level_) {
+ TGuard<TMutex> Lock(Lock_);
+ Out_->Write(message, strlen(message));
+ }
+ }
+
+ IOutputStream& OutputStream() const {
+ return *Out_;
+ }
+
+ ELogLevel Level() const {
+ return Level_;
+ }
+
+private:
+ TLogger()
+ : Out_()
+ , Level_(LOG_LEVEL_DISABLE)
+ {
+ }
+
+ IOutputStream* Out_;
+ ELogLevel Level_;
+ TMutex Lock_;
+};
+
+ELogLevel NReverseGeocoder::LogLevel() {
+ return TLogger::Inst().Level();
+}
+
+void NReverseGeocoder::LogSetup(IOutputStream& out, ELogLevel level) {
+ TLogger::Inst().Setup(out, level);
+}
+
+IOutputStream& NReverseGeocoder::LogOutputStream() {
+ return TLogger::Inst().OutputStream();
+}
+
+static const char* T(char* buffer) {
+ struct timeval timeVal;
+ gettimeofday(&timeVal, nullptr);
+
+ struct tm timeInfo;
+ const time_t sec = timeVal.tv_sec;
+ localtime_r(&sec, &timeInfo);
+
+ snprintf(buffer, TIMESTAMP_LIMIT, "%02d:%02d:%02d.%06d",
+ timeInfo.tm_hour, timeInfo.tm_min, timeInfo.tm_sec, (int)timeVal.tv_usec);
+
+ return buffer;
+}
+
+void NReverseGeocoder::LogWrite(ELogLevel level, const char* message) {
+ if (level > LogLevel())
+ return;
+
+ static const char* A[LOG_LEVEL_COUNT] = {
+ "", // LOG_LEVEL_DISABLE
+ "\033[90m", // LOG_LEVEL_ERROR
+ "\033[90m", // LOG_LEVEL_WARNING
+ "\033[90m", // LOG_LEVEL_INFO
+ "\033[90m", // LOG_LEVEL_DEBUG
+ };
+
+ static const char* B[LOG_LEVEL_COUNT] = {
+ "", // LOG_LEVEL_DISABLE
+ "\033[31;1mError\033[0m", // LOG_LEVEL_ERROR
+ "\033[33;1mWarn\033[0m", // LOG_LEVEL_WARNING
+ "\033[32;1mInfo\033[0m", // LOG_LEVEL_INFO
+ "Debug", // LOG_LEVEL_DEBUG
+ };
+
+ static const char* C[LOG_LEVEL_COUNT] = {
+ "", // LOG_LEVEL_DISABLE
+ "\n", // LOG_LEVEL_ERROR
+ "\n", // LOG_LEVEL_WARNING
+ "\n", // LOG_LEVEL_INFO
+ "\033[0m\n", // LOG_LEVEL_DEBUG
+ };
+
+ char buffer[LOG_MESSAGE_LIMIT], tbuffer[TIMESTAMP_LIMIT];
+ // Ignore logger snprintf errors.
+ snprintf(buffer, LOG_MESSAGE_LIMIT, "%s(%s) %s: %s%s",
+ A[level], T(tbuffer), B[level], message, C[level]);
+
+ TLogger::Inst().Write(level, buffer);
+}
diff --git a/library/cpp/reverse_geocoder/library/log.h b/library/cpp/reverse_geocoder/library/log.h
new file mode 100644
index 0000000000..44cb0cefcf
--- /dev/null
+++ b/library/cpp/reverse_geocoder/library/log.h
@@ -0,0 +1,65 @@
+#pragma once
+
+#include <util/generic/yexception.h>
+#include <util/stream/output.h>
+
+#include <cstdio>
+
+namespace NReverseGeocoder {
+ size_t const LOG_MESSAGE_LIMIT = 1024;
+
+ enum ELogLevel {
+ LOG_LEVEL_DISABLE = 0,
+ LOG_LEVEL_ERROR,
+ LOG_LEVEL_WARNING,
+ LOG_LEVEL_INFO,
+ LOG_LEVEL_DEBUG,
+ LOG_LEVEL_COUNT
+ };
+
+ // Init logger. Setup OutputStream and logger level.
+ void LogSetup(IOutputStream& out, ELogLevel level);
+
+ // Write log message with colors, level and current time.
+ // Example:
+ // (13:24:11.123456) Info: Good job!
+ // (13:24:11.323456) Warn: Ooops :(
+ // (13:24:22.456789) Error: Hello, world!
+ void LogWrite(ELogLevel level, const char* message);
+
+ // Log output file descriptor.
+ IOutputStream& LogOutputStream();
+
+ // Current log level.
+ ELogLevel LogLevel();
+
+ template <typename... TArgs>
+ void LogWrite(ELogLevel level, const char* fmt, TArgs... args) {
+ if (level <= LogLevel()) {
+ char buffer[LOG_MESSAGE_LIMIT];
+ // Ignore logger snprintf errors.
+ snprintf(buffer, LOG_MESSAGE_LIMIT, fmt, std::forward<TArgs>(args)...);
+ LogWrite(level, buffer);
+ }
+ }
+
+ template <typename... TArgs>
+ void LogError(TArgs... args) {
+ LogWrite(LOG_LEVEL_ERROR, std::forward<TArgs>(args)...);
+ }
+
+ template <typename... TArgs>
+ void LogWarning(TArgs... args) {
+ LogWrite(LOG_LEVEL_WARNING, std::forward<TArgs>(args)...);
+ }
+
+ template <typename... TArgs>
+ void LogInfo(TArgs... args) {
+ LogWrite(LOG_LEVEL_INFO, std::forward<TArgs>(args)...);
+ }
+
+ template <typename... TArgs>
+ void LogDebug(TArgs... args) {
+ LogWrite(LOG_LEVEL_DEBUG, std::forward<TArgs>(args)...);
+ }
+}
diff --git a/library/cpp/reverse_geocoder/library/memory.h b/library/cpp/reverse_geocoder/library/memory.h
new file mode 100644
index 0000000000..ecbe8bcd66
--- /dev/null
+++ b/library/cpp/reverse_geocoder/library/memory.h
@@ -0,0 +1,23 @@
+#pragma once
+
+#include <util/system/types.h>
+
+namespace NReverseGeocoder {
+ constexpr ui64 B = 1ull;
+ constexpr ui64 KB = 1024 * B;
+ constexpr ui64 MB = 1024 * KB;
+ constexpr ui64 GB = 1024 * MB;
+
+ constexpr size_t MEMORY_ALIGNMENT = 16ull;
+
+ inline unsigned long long AlignMemory(unsigned long long x) {
+ if (x % MEMORY_ALIGNMENT == 0)
+ return x;
+ return x + MEMORY_ALIGNMENT - x % MEMORY_ALIGNMENT;
+ }
+
+ inline bool IsAlignedMemory(void* ptr) {
+ return ((uintptr_t)ptr) % MEMORY_ALIGNMENT == 0;
+ }
+
+}
diff --git a/library/cpp/reverse_geocoder/library/pool_allocator.cpp b/library/cpp/reverse_geocoder/library/pool_allocator.cpp
new file mode 100644
index 0000000000..0d841f7db0
--- /dev/null
+++ b/library/cpp/reverse_geocoder/library/pool_allocator.cpp
@@ -0,0 +1,17 @@
+#include "memory.h"
+#include "pool_allocator.h"
+
+#include <util/generic/yexception.h>
+
+using namespace NReverseGeocoder;
+
+NReverseGeocoder::TPoolAllocator::TPoolAllocator(size_t poolSize) {
+ Ptr_ = new char[poolSize];
+ Size_ = poolSize;
+ Setup(Ptr_, Size_);
+}
+
+NReverseGeocoder::TPoolAllocator::~TPoolAllocator() {
+ if (Ptr_)
+ delete[] Ptr_;
+}
diff --git a/library/cpp/reverse_geocoder/library/pool_allocator.h b/library/cpp/reverse_geocoder/library/pool_allocator.h
new file mode 100644
index 0000000000..f98bbcd3c1
--- /dev/null
+++ b/library/cpp/reverse_geocoder/library/pool_allocator.h
@@ -0,0 +1,42 @@
+#pragma once
+
+#include "block_allocator.h"
+
+#include <util/generic/utility.h>
+#include <util/generic/noncopyable.h>
+
+namespace NReverseGeocoder {
+ class TPoolAllocator: public TBlockAllocator {
+ public:
+ TPoolAllocator()
+ : Ptr_(nullptr)
+ , Size_(0)
+ {
+ }
+
+ TPoolAllocator(TPoolAllocator&& a)
+ : TBlockAllocator(std::forward<TBlockAllocator>(a))
+ , Ptr_(nullptr)
+ , Size_(0)
+ {
+ DoSwap(Ptr_, a.Ptr_);
+ DoSwap(Size_, a.Size_);
+ }
+
+ TPoolAllocator& operator=(TPoolAllocator&& a) {
+ TBlockAllocator::operator=(std::forward<TBlockAllocator>(a));
+ DoSwap(Ptr_, a.Ptr_);
+ DoSwap(Size_, a.Size_);
+ return *this;
+ }
+
+ explicit TPoolAllocator(size_t poolSize);
+
+ ~TPoolAllocator() override;
+
+ private:
+ char* Ptr_;
+ size_t Size_;
+ };
+
+}
diff --git a/library/cpp/reverse_geocoder/library/system.h b/library/cpp/reverse_geocoder/library/system.h
new file mode 100644
index 0000000000..499fb2bd91
--- /dev/null
+++ b/library/cpp/reverse_geocoder/library/system.h
@@ -0,0 +1,3 @@
+#pragma once
+
+#define SYSTEM_ENDIAN_FLAG (htonl(337))
diff --git a/library/cpp/reverse_geocoder/library/unaligned_iter.cpp b/library/cpp/reverse_geocoder/library/unaligned_iter.cpp
new file mode 100644
index 0000000000..0322b677dc
--- /dev/null
+++ b/library/cpp/reverse_geocoder/library/unaligned_iter.cpp
@@ -0,0 +1 @@
+#include "unaligned_iter.h"
diff --git a/library/cpp/reverse_geocoder/library/unaligned_iter.h b/library/cpp/reverse_geocoder/library/unaligned_iter.h
new file mode 100644
index 0000000000..827a3e2fd2
--- /dev/null
+++ b/library/cpp/reverse_geocoder/library/unaligned_iter.h
@@ -0,0 +1,64 @@
+#pragma once
+
+#include <util/system/unaligned_mem.h>
+#include <iterator>
+
+namespace NReverseGeocoder {
+ /**
+ * Random-access iterator over a read-only memory range
+ * of trivially copyable items that may be not aligned properly.
+ *
+ * When dereferencing, a copy of item is returned, not a reference.
+ * Be sure that sizeof(T) is small enough.
+ *
+ * Iterator is useful for LowerBound/UpperBound STL algorithms.
+ */
+ template <class T>
+ class TUnalignedIter: public std::iterator<std::random_access_iterator_tag, T> {
+ public:
+ using TSelf = TUnalignedIter<T>;
+
+ explicit TUnalignedIter(const T* ptr)
+ : Ptr(ptr)
+ {
+ }
+
+ T operator*() const {
+ return ReadUnaligned<T>(Ptr);
+ }
+
+ bool operator==(TSelf other) const {
+ return Ptr == other.Ptr;
+ }
+
+ bool operator<(TSelf other) const {
+ return Ptr < other.Ptr;
+ }
+
+ TSelf operator+(ptrdiff_t delta) const {
+ return TSelf{Ptr + delta};
+ }
+
+ ptrdiff_t operator-(TSelf other) const {
+ return Ptr - other.Ptr;
+ }
+
+ TSelf& operator+=(ptrdiff_t delta) {
+ Ptr += delta;
+ return *this;
+ }
+
+ TSelf& operator++() {
+ ++Ptr;
+ return *this;
+ }
+
+ private:
+ const T* Ptr;
+ };
+
+ template <class T>
+ TUnalignedIter<T> UnalignedIter(const T* ptr) {
+ return TUnalignedIter<T>(ptr);
+ }
+}
diff --git a/library/cpp/reverse_geocoder/library/ya.make b/library/cpp/reverse_geocoder/library/ya.make
new file mode 100644
index 0000000000..ec2eb205a8
--- /dev/null
+++ b/library/cpp/reverse_geocoder/library/ya.make
@@ -0,0 +1,11 @@
+LIBRARY()
+
+SRCS(
+ block_allocator.cpp
+ fs.cpp
+ log.cpp
+ pool_allocator.cpp
+ unaligned_iter.cpp
+)
+
+END()
diff --git a/library/cpp/reverse_geocoder/proto/CMakeLists.darwin-x86_64.txt b/library/cpp/reverse_geocoder/proto/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..8d1df0fdf8
--- /dev/null
+++ b/library/cpp/reverse_geocoder/proto/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,56 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(cpp-reverse_geocoder-proto)
+target_link_libraries(cpp-reverse_geocoder-proto PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-protobuf
+)
+target_proto_messages(cpp-reverse_geocoder-proto PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/proto/geo_data.proto
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/proto/region.proto
+)
+target_proto_addincls(cpp-reverse_geocoder-proto
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(cpp-reverse_geocoder-proto
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/library/cpp/reverse_geocoder/proto/CMakeLists.linux-aarch64.txt b/library/cpp/reverse_geocoder/proto/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..b53c1692ee
--- /dev/null
+++ b/library/cpp/reverse_geocoder/proto/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,57 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(cpp-reverse_geocoder-proto)
+target_link_libraries(cpp-reverse_geocoder-proto PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-protobuf
+)
+target_proto_messages(cpp-reverse_geocoder-proto PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/proto/geo_data.proto
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/proto/region.proto
+)
+target_proto_addincls(cpp-reverse_geocoder-proto
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(cpp-reverse_geocoder-proto
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/library/cpp/reverse_geocoder/proto/CMakeLists.linux-x86_64.txt b/library/cpp/reverse_geocoder/proto/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..b53c1692ee
--- /dev/null
+++ b/library/cpp/reverse_geocoder/proto/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,57 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(cpp-reverse_geocoder-proto)
+target_link_libraries(cpp-reverse_geocoder-proto PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-protobuf
+)
+target_proto_messages(cpp-reverse_geocoder-proto PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/proto/geo_data.proto
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/proto/region.proto
+)
+target_proto_addincls(cpp-reverse_geocoder-proto
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(cpp-reverse_geocoder-proto
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/library/cpp/reverse_geocoder/proto/CMakeLists.txt b/library/cpp/reverse_geocoder/proto/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/library/cpp/reverse_geocoder/proto/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/library/cpp/reverse_geocoder/proto/CMakeLists.windows-x86_64.txt b/library/cpp/reverse_geocoder/proto/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..8d1df0fdf8
--- /dev/null
+++ b/library/cpp/reverse_geocoder/proto/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,56 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(cpp-reverse_geocoder-proto)
+target_link_libraries(cpp-reverse_geocoder-proto PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-protobuf
+)
+target_proto_messages(cpp-reverse_geocoder-proto PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/proto/geo_data.proto
+ ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/proto/region.proto
+)
+target_proto_addincls(cpp-reverse_geocoder-proto
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(cpp-reverse_geocoder-proto
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/library/cpp/reverse_geocoder/proto/geo_data.proto b/library/cpp/reverse_geocoder/proto/geo_data.proto
new file mode 100644
index 0000000000..00ecb48bec
--- /dev/null
+++ b/library/cpp/reverse_geocoder/proto/geo_data.proto
@@ -0,0 +1,42 @@
+package NReverseGeocoder.NProto;
+
+message TGeoData {
+ required uint64 Magic = 1;
+ required uint64 Version = 2;
+ optional uint64 Points = 3;
+ optional uint64 PointsNumber = 4;
+ optional uint64 PointsCrc32 = 5;
+ optional uint64 Edges = 6;
+ optional uint64 EdgesNumber = 7;
+ optional uint64 EdgesCrc32 = 8;
+ optional uint64 EdgeRefs = 9;
+ optional uint64 EdgeRefsNumber = 10;
+ optional uint64 EdgeRefsCrc32 = 11;
+ optional uint64 Parts = 12;
+ optional uint64 PartsNumber = 13;
+ optional uint64 PartsCrc32 = 14;
+ optional uint64 Polygons = 15;
+ optional uint64 PolygonsNumber = 16;
+ optional uint64 PolygonsCrc32 = 17;
+ optional uint64 PolygonRefs = 18;
+ optional uint64 PolygonRefsNumber = 19;
+ optional uint64 PolygonRefsCrc32 = 20;
+ optional uint64 Boxes = 21;
+ optional uint64 BoxesNumber = 22;
+ optional uint64 BoxesCrc32 = 23;
+ optional uint64 Blobs = 24;
+ optional uint64 BlobsNumber = 25;
+ optional uint64 BlobsCrc32 = 26;
+ optional uint64 Kvs = 27;
+ optional uint64 KvsNumber = 28;
+ optional uint64 KvsCrc32 = 29;
+ optional uint64 Regions = 30;
+ optional uint64 RegionsNumber = 31;
+ optional uint64 RegionsCrc32 = 32;
+ optional uint64 RawPolygons = 33;
+ optional uint64 RawPolygonsNumber = 34;
+ optional uint64 RawPolygonsCrc32 = 35;
+ optional uint64 RawEdgeRefs = 36;
+ optional uint64 RawEdgeRefsNumber = 37;
+ optional uint64 RawEdgeRefsCrc32 = 38;
+};
diff --git a/library/cpp/reverse_geocoder/proto/region.proto b/library/cpp/reverse_geocoder/proto/region.proto
new file mode 100644
index 0000000000..b782331628
--- /dev/null
+++ b/library/cpp/reverse_geocoder/proto/region.proto
@@ -0,0 +1,32 @@
+package NReverseGeocoder.NProto;
+
+message TLocation {
+ required double Lat = 1;
+ required double Lon = 2;
+}
+
+message TPolygon {
+ required uint64 PolygonId = 1;
+ repeated TLocation Locations = 2;
+
+ enum EType {
+ TYPE_UNKNOWN = 0;
+ TYPE_INNER = 1;
+ TYPE_OUTER = 2;
+ }
+
+ required EType Type = 3;
+}
+
+message TKv {
+ required string K = 1;
+ required string V = 2;
+}
+
+message TRegion {
+ required uint64 RegionId = 1;
+ optional uint64 ParentId = 2;
+ repeated TPolygon Polygons = 3;
+ repeated TKv Kvs = 4;
+ repeated string Blobs = 5;
+}
diff --git a/library/cpp/reverse_geocoder/proto/ya.make b/library/cpp/reverse_geocoder/proto/ya.make
new file mode 100644
index 0000000000..b6f7156210
--- /dev/null
+++ b/library/cpp/reverse_geocoder/proto/ya.make
@@ -0,0 +1,10 @@
+PROTO_LIBRARY()
+
+SRCS(
+ geo_data.proto
+ region.proto
+)
+
+EXCLUDE_TAGS(GO_PROTO)
+
+END()
diff --git a/library/cpp/robots_txt/CMakeLists.darwin-x86_64.txt b/library/cpp/robots_txt/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..408bf12f04
--- /dev/null
+++ b/library/cpp/robots_txt/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,26 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(robotstxtcfg)
+
+add_library(library-cpp-robots_txt)
+target_link_libraries(library-cpp-robots_txt PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ cpp-robots_txt-robotstxtcfg
+ library-cpp-case_insensitive_string
+ library-cpp-charset
+ cpp-string_utils-url
+ library-cpp-uri
+)
+target_sources(library-cpp-robots_txt PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/prefix_tree.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/prefix_tree_rules_handler.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robots_txt_parser.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/rules_handler.cpp
+)
diff --git a/library/cpp/robots_txt/CMakeLists.linux-aarch64.txt b/library/cpp/robots_txt/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..73a209cbbe
--- /dev/null
+++ b/library/cpp/robots_txt/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,27 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(robotstxtcfg)
+
+add_library(library-cpp-robots_txt)
+target_link_libraries(library-cpp-robots_txt PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ cpp-robots_txt-robotstxtcfg
+ library-cpp-case_insensitive_string
+ library-cpp-charset
+ cpp-string_utils-url
+ library-cpp-uri
+)
+target_sources(library-cpp-robots_txt PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/prefix_tree.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/prefix_tree_rules_handler.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robots_txt_parser.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/rules_handler.cpp
+)
diff --git a/library/cpp/robots_txt/CMakeLists.linux-x86_64.txt b/library/cpp/robots_txt/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..73a209cbbe
--- /dev/null
+++ b/library/cpp/robots_txt/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,27 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(robotstxtcfg)
+
+add_library(library-cpp-robots_txt)
+target_link_libraries(library-cpp-robots_txt PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ cpp-robots_txt-robotstxtcfg
+ library-cpp-case_insensitive_string
+ library-cpp-charset
+ cpp-string_utils-url
+ library-cpp-uri
+)
+target_sources(library-cpp-robots_txt PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/prefix_tree.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/prefix_tree_rules_handler.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robots_txt_parser.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/rules_handler.cpp
+)
diff --git a/library/cpp/robots_txt/CMakeLists.txt b/library/cpp/robots_txt/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/library/cpp/robots_txt/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/library/cpp/robots_txt/CMakeLists.windows-x86_64.txt b/library/cpp/robots_txt/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..408bf12f04
--- /dev/null
+++ b/library/cpp/robots_txt/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,26 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(robotstxtcfg)
+
+add_library(library-cpp-robots_txt)
+target_link_libraries(library-cpp-robots_txt PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ cpp-robots_txt-robotstxtcfg
+ library-cpp-case_insensitive_string
+ library-cpp-charset
+ cpp-string_utils-url
+ library-cpp-uri
+)
+target_sources(library-cpp-robots_txt PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/prefix_tree.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/prefix_tree_rules_handler.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robots_txt_parser.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/rules_handler.cpp
+)
diff --git a/library/cpp/robots_txt/constants.h b/library/cpp/robots_txt/constants.h
new file mode 100644
index 0000000000..e5e2a57e18
--- /dev/null
+++ b/library/cpp/robots_txt/constants.h
@@ -0,0 +1,9 @@
+#pragma once
+
+#include <util/generic/size_literals.h>
+#include <util/system/defaults.h>
+
+
+constexpr auto robots_max = 500_KB;
+constexpr auto max_rules_count = 10'000;
+constexpr auto max_rule_length = 10_KB;
diff --git a/library/cpp/robots_txt/prefix_tree.cpp b/library/cpp/robots_txt/prefix_tree.cpp
new file mode 100644
index 0000000000..f7b1848a43
--- /dev/null
+++ b/library/cpp/robots_txt/prefix_tree.cpp
@@ -0,0 +1,172 @@
+#include <cstring>
+#include <algorithm>
+
+#include "prefix_tree.h"
+
+TPrefixTreeNodeElement::TPrefixTreeNodeElement()
+ : Key(nullptr)
+ , KeyLen(0)
+ , Val(-1)
+ , Index(-1)
+{
+}
+
+TPrefixTreeNodeElement::TPrefixTreeNodeElement(const char* key, i32 keyLen = 0, i32 val = -1, i32 index = -1)
+ : Key(key)
+ , KeyLen(keyLen)
+ , Val(val)
+ , Index(index)
+{
+}
+
+TPrefixTreeNode::TPrefixTreeNode()
+ : Elements()
+{
+}
+
+int TPrefixTreeNode::Find(char ch) const {
+ for (size_t i = 0; i < Elements.size(); ++i)
+ if (ch == *(Elements[i].Key))
+ return i;
+ return -1;
+}
+
+void TPrefixTreeNode::Set(const char* key, i32 keyLen, i32 val, i32 index) {
+ TPrefixTreeNodeElement element(key, keyLen, val, index);
+ int i = Find(*key);
+ if (i < 0)
+ Elements.push_back(element);
+ else
+ Elements[i] = element;
+}
+
+void TPrefixTreeNode::Dump(FILE* logFile) const {
+ if (!logFile)
+ logFile = stderr;
+ fprintf(logFile, "size=%" PRISZT "\n", Elements.size());
+ static char b[1234];
+ for (size_t i = 0; i < Elements.size(); ++i) {
+ strncpy(b, Elements[i].Key, Elements[i].KeyLen);
+ b[Elements[i].KeyLen] = 0;
+ fprintf(logFile, "{key=[%s]:%d, val=%d, index=%d}\n", b, Elements[i].KeyLen, Elements[i].Val, Elements[i].Index);
+ }
+}
+
+void TPrefixTree::Dump(FILE* logFile) const {
+ if (!logFile)
+ logFile = stderr;
+ fprintf(logFile, "%" PRISZT " nodes\n", Nodes.size());
+ for (size_t i = 0; i < Nodes.size(); ++i) {
+ fprintf(logFile, "%" PRISZT ": ", i);
+ Nodes[i].Dump(logFile);
+ fprintf(logFile, "\n");
+ }
+}
+
+TPrefixTree::TPrefixTree(int maxSize) {
+ Init(maxSize);
+}
+
+void TPrefixTree::Init(int maxSize) {
+ Nodes.clear();
+ Nodes.reserve(std::max(maxSize + 1, 1));
+ Nodes.push_back(TPrefixTreeNode());
+}
+
+void TPrefixTree::Clear() {
+ Nodes.clear();
+ Init(0);
+}
+
+void TPrefixTree::Add(const char* s, i32 index) {
+ AddInternal(s, Nodes[0], index);
+}
+
+void TPrefixTree::AddInternal(const char* s, TPrefixTreeNode& node, i32 index) {
+ if (!s || !*s)
+ return;
+
+ int i = node.Find(*s);
+ if (i >= 0) {
+ TPrefixTreeNodeElement& d = node.Elements[i];
+ const char* p = d.Key;
+ while (*s && (p - d.Key) < d.KeyLen && *s == *p)
+ ++s, ++p;
+
+ if (*s) {
+ if ((p - d.Key) < d.KeyLen) {
+ Nodes.push_back(TPrefixTreeNode());
+ Nodes.back().Set(p, d.KeyLen - (p - d.Key), d.Val, d.Index);
+ Nodes.back().Set(s, strlen(s), -1, index);
+
+ d.Val = Nodes.size() - 1;
+ d.KeyLen = p - d.Key;
+ d.Index = INDEX_BOUND;
+ } else {
+ if (d.Val != -1 && index < d.Index)
+ AddInternal(s, Nodes[d.Val], index);
+ }
+ } else {
+ if ((p - d.Key) < d.KeyLen) {
+ Nodes.push_back(TPrefixTreeNode());
+ Nodes.back().Set(p, d.KeyLen - (p - d.Key), d.Val, d.Index);
+ d.Val = Nodes.size() - 1;
+ d.KeyLen = p - d.Key;
+ d.Index = index;
+ } else {
+ d.Index = std::min(d.Index, index);
+ }
+ }
+ } else {
+ node.Set(s, strlen(s), -1, index);
+ }
+}
+
+int TPrefixTree::GetMemorySize() const {
+ int res = Nodes.capacity() * sizeof(TPrefixTreeNode);
+ for (size_t i = 0; i < Nodes.size(); ++i)
+ res += Nodes[i].Elements.capacity() * sizeof(TPrefixTreeNodeElement);
+ return res;
+}
+
+void TPrefixTree::Compress() {
+ Nodes.shrink_to_fit();
+ for (size_t i = 0; i < Nodes.size(); ++i)
+ Nodes[i].Elements.shrink_to_fit();
+}
+
+i32 TPrefixTree::MinPrefixIndex(const char* s) const {
+ if (!*s)
+ return -1;
+ int i = Nodes[0].Find(*s);
+ if (i < 0)
+ return -1;
+ const TPrefixTreeNodeElement* d = &Nodes[0].Elements[i];
+
+ const char* p = d->Key;
+ if (!p || !*p)
+ return -1;
+
+ i32 result = INDEX_BOUND;
+ i32 nodeIndex = 0;
+ while (*s == *p) {
+ if (++p - d->Key >= d->KeyLen)
+ result = std::min(result, d->Index);
+ if (!*++s)
+ break;
+
+ if (p - d->Key >= d->KeyLen) {
+ nodeIndex = d->Val;
+ if (nodeIndex == -1)
+ break;
+ i = Nodes[nodeIndex].Find(*s);
+ if (i < 0)
+ break;
+ d = &Nodes[nodeIndex].Elements[i];
+ p = d->Key;
+ if (!p || !*p)
+ break;
+ }
+ }
+ return result < INDEX_BOUND ? result : -1;
+}
diff --git a/library/cpp/robots_txt/prefix_tree.h b/library/cpp/robots_txt/prefix_tree.h
new file mode 100644
index 0000000000..5feafcb74d
--- /dev/null
+++ b/library/cpp/robots_txt/prefix_tree.h
@@ -0,0 +1,47 @@
+#pragma once
+
+#include <util/generic/ptr.h>
+#include <util/generic/vector.h>
+#include <cstdio>
+#include <util/generic/noncopyable.h>
+
+struct TPrefixTreeNodeElement {
+ const char* Key;
+ i32 KeyLen;
+ i32 Val;
+ i32 Index;
+
+ TPrefixTreeNodeElement();
+ TPrefixTreeNodeElement(const char*, i32, i32, i32);
+};
+
+class TPrefixTreeNode {
+public:
+ TVector<TPrefixTreeNodeElement> Elements;
+ TPrefixTreeNode();
+
+ int Find(char) const;
+ void Set(const char*, i32, i32, i32);
+ void Dump(FILE*) const;
+};
+
+class TPrefixTree : TNonCopyable {
+private:
+ static const i32 INDEX_BOUND = 1 << 30;
+
+ TVector<TPrefixTreeNode> Nodes;
+
+public:
+ void Init(int);
+ TPrefixTree(int);
+
+ void Add(const char*, i32);
+ i32 MinPrefixIndex(const char*) const;
+ void Clear();
+ void Dump(FILE*) const;
+ int GetMemorySize() const;
+ void Compress();
+
+private:
+ void AddInternal(const char*, TPrefixTreeNode&, i32);
+};
diff --git a/library/cpp/robots_txt/prefix_tree_rules_handler.cpp b/library/cpp/robots_txt/prefix_tree_rules_handler.cpp
new file mode 100644
index 0000000000..8dd579d060
--- /dev/null
+++ b/library/cpp/robots_txt/prefix_tree_rules_handler.cpp
@@ -0,0 +1,706 @@
+#include "robots_txt.h"
+
+#include <util/digest/fnv.h>
+#include <util/system/tls.h>
+#include <util/generic/buffer.h>
+#include <util/generic/yexception.h>
+
+namespace {
+
+TString NormalizeRule(TStringBuf rule) {
+ TString result;
+ result.reserve(rule.size() + 1);
+
+ // remove consecutive '*'
+ for (auto c : rule) {
+ if (c != '*' || !result.EndsWith('*')) {
+ result.append(c);
+ }
+ }
+
+ if (rule == "*") {
+ result = "/*";
+ return result;
+ }
+
+ // unify suffix
+ if (result.EndsWith('$')) {
+ result.pop_back();
+ } else if (!result.EndsWith('*')) {
+ result.append('*');
+ }
+
+ return result;
+}
+
+// Prefix rules
+bool IsPrefixRule(TStringBuf rule) {
+ return rule.EndsWith('*') && !TStringBuf(rule.begin(), rule.end() - 1).Contains('*');
+}
+
+// Converts rule to internal representation, i.e.
+// For prefix rules: "/foo", 'D' -> 'D', "/foo"
+// For generic rules: "/*foo", 'D' -> ("/*/*foo*", 'd') or ("/*foo$", 'A') -> ("/*foo", 'a')
+// The distinction is in uppercase/lowercase rule type
+std::pair<TString, char> ConvertRule(TStringBuf rule, char type) {
+ switch (type) {
+ case 'H':
+ case 'S':
+ case 'C':
+ case 'P':
+ return {TString(rule), type};
+ case 'A':
+ case 'D':
+ break;
+ default:
+ return {{}, type};
+ }
+
+ auto result = NormalizeRule(rule);
+ if (IsPrefixRule(result)) {
+ result.pop_back(); // remove extra '*' from the end
+ } else {
+ type = tolower(type);
+ }
+
+ return {std::move(result), type};
+}
+
+} // namespace
+
+TPrefixTreeRobotsTxtRulesHandler::TPrefixTreeRobotsTxtRulesHandler(
+ TBotIdSet supportedBotIds,
+ int robotsMaxSize,
+ int maxRulesNumber,
+ bool saveDataForAnyBot)
+ : TRobotsTxtRulesHandlerBase(supportedBotIds, robotsMaxSize, maxRulesNumber, saveDataForAnyBot)
+{}
+
+TPrefixTreeRobotsTxtRulesHandler::TPrefixTreeRobotsTxtRulesHandler(
+ std::initializer_list<ui32> supportedBotIds,
+ int robotsMaxSize,
+ int maxRulesNumber,
+ bool saveDataForAnyBot)
+ : TRobotsTxtRulesHandlerBase(TBotIdSet(supportedBotIds), robotsMaxSize, maxRulesNumber, saveDataForAnyBot)
+{}
+
+TPrefixTreeRobotsTxtRulesHandler::TPrefixTreeRobotsTxtRulesHandler(
+ const TSet<ui32>& supportedBotIds,
+ int robotsMaxSize,
+ int maxRulesNumber,
+ bool saveDataForAnyBot)
+ : TRobotsTxtRulesHandlerBase(supportedBotIds, robotsMaxSize, maxRulesNumber, saveDataForAnyBot)
+{}
+
+bool TPrefixTreeRobotsTxtRulesHandler::Empty(const ui32 botId) const {
+ const auto& botInfo = BotIdToPrefixTreeBotInfo[GetNotOptimizedBotId(botId)];
+ return !botInfo || (botInfo->BufferPosition <= sizeof(botInfo->BufferPosition));
+}
+
+TRobotsTxtRulesIterator TPrefixTreeRobotsTxtRulesHandler::GetRulesIterator(const ui32 botId) const {
+ const auto& botInfo = BotIdToPrefixTreeBotInfo[GetNotOptimizedBotId(botId)];
+ if (!botInfo) {
+ return {};
+ }
+ return TRobotsTxtRulesIterator(botInfo->Buffer.Get() + sizeof(botInfo->BufferPosition), botInfo->Buffer.Get() + botInfo->BufferPosition);
+}
+
+size_t TPrefixTreeRobotsTxtRulesHandler::GetMemorySize() {
+ size_t allBotsSize = 0;
+ for (const auto& botInfo : BotIdToPrefixTreeBotInfo) {
+ if (!botInfo) {
+ continue;
+ }
+
+ allBotsSize += botInfo->PrefixRules.GetMemorySize()
+ + botInfo->BufferSize * sizeof(char)
+ + botInfo->ComplexRulesSize * sizeof(char**)
+ + botInfo->RulesSize * sizeof(char*) + (1 << 8);
+ }
+ return allBotsSize;
+}
+
+void TPrefixTreeRobotsTxtRulesHandler::ClearInternal(const ui32 botId) {
+ if (botId >= BotIdToPrefixTreeBotInfo.size()) {
+ return;
+ }
+ BotIdToPrefixTreeBotInfo[botId].Reset();
+ TRobotsTxtRulesHandlerBase::ClearInternal(botId);
+}
+
+bool TPrefixTreeRobotsTxtRulesHandler::OptimizeSize() {
+ ResetOptimized();
+
+ TMap<ui64, ui32> hashToBotId;
+ for (auto botId : LoadedBotIds) {
+ auto& botInfo = BotIdToPrefixTreeBotInfo[botId];
+ if (botInfo->BufferPosition <= sizeof(ui32)) {
+ botInfo.Reset();
+ LoadedBotIds.remove(botId);
+ continue;
+ }
+
+ ui64 hash = FnvHash<ui64>(botInfo->Buffer.Get(), botInfo->BufferPosition);
+ if (auto p = hashToBotId.FindPtr(hash)) {
+ OptimizedBotIdToStoredBotId[botId] = *p;
+ ClearInternal(botId);
+ botInfo.Reset();
+ } else {
+ hashToBotId[hash] = botId;
+ }
+ }
+
+ if (IsFullTotal()) {
+ DoAllowAll();
+ return false;
+ }
+
+ return true;
+}
+
+void TPrefixTreeRobotsTxtRulesHandler::Clear() {
+ for (size_t botId = 0; botId < robotstxtcfg::max_botid; ++botId)
+ if (IsBotIdSupported(botId))
+ ClearInternal(botId);
+ TRobotsTxtRulesHandlerBase::Clear();
+}
+
+void TPrefixTreeRobotsTxtRulesHandler::ResizeBuffer(const ui32 botId, int newSize) {
+ auto& botInfo = GetInfo(botId);
+ TArrayHolder<char> newBuffer(new char[newSize]);
+ memcpy(newBuffer.Get(), botInfo.Buffer.Get(), std::min(botInfo.BufferSize, newSize));
+ botInfo.Buffer.Swap(newBuffer);
+ botInfo.BufferSize = newSize;
+}
+
+bool TPrefixTreeRobotsTxtRulesHandler::AddRule(const ui32 botId, TStringBuf rule, char type) {
+ if (rule.empty() || rule.Contains('\0')) {
+ return true;
+ }
+
+ auto& botInfo = GetInfo(botId);
+
+ if (IsFull(botId, rule.size())) {
+ DoAllowAll();
+ return false;
+ }
+
+ auto [convertedRule, convertedType] = ConvertRule(rule, type);
+ const auto len = convertedRule.size() + 2; // 1 byte for convertedType and another for '\0'
+
+ if (auto newPos = botInfo.BufferPosition + len; newPos >= size_t(botInfo.BufferSize)) {
+ size_t newSize = botInfo.BufferSize;
+ while (newPos >= newSize)
+ newSize *= 2;
+ ResizeBuffer(botId, newSize);
+ }
+
+ auto out = botInfo.Buffer.Get() + botInfo.BufferPosition;
+ *out++ = convertedType;
+ strcpy(out, convertedRule.data());
+ botInfo.BufferPosition += len;
+
+ if (type == 'A' || type == 'D') {
+ botInfo.RulesPosition++;
+ }
+
+ return true;
+}
+
+const char* TPrefixTreeRobotsTxtRulesHandler::GetRule(const ui32 botId, const char* s, char type) const {
+ const auto& botInfo = BotIdToPrefixTreeBotInfo[GetNotOptimizedBotId(botId)];
+ if (!botInfo) {
+ return nullptr;
+ }
+
+ int m = botInfo->RulesPosition + 1;
+ int k = botInfo->PrefixRules.MinPrefixIndex(s);
+ if (k >= 0)
+ m = k;
+ char* rule;
+ int j;
+ for (int i = 0; i < botInfo->ComplexRulesPosition; ++i) {
+ rule = *botInfo->ComplexRules.Get()[i];
+ j = botInfo->ComplexRules.Get()[i] - botInfo->Rules.Get();
+ if (j >= m)
+ break;
+ if (CheckRule(s, rule)) {
+ m = j;
+ break;
+ }
+ }
+ if (m >= botInfo->RulesPosition)
+ return nullptr;
+ return toupper(*(botInfo->Rules.Get()[m] - 1)) == type ? botInfo->Rules.Get()[m] : nullptr;
+}
+
+inline bool TPrefixTreeRobotsTxtRulesHandler::IsAllowAll(const ui32 botId) const {
+ const auto id = GetMappedBotId(botId, false);
+ auto& botInfo = BotIdToPrefixTreeBotInfo[id ? *id : robotstxtcfg::id_anybot];
+ return botInfo && botInfo->AllowAll;
+}
+
+inline bool TPrefixTreeRobotsTxtRulesHandler::IsAllowAll() const {
+ for (ui32 botId = 0; botId < robotstxtcfg::max_botid; ++botId)
+ if (robotstxtcfg::IsYandexBotId(botId) && IsBotIdSupported(botId) && !IsAllowAll(botId)) {
+ return false;
+ }
+
+ return true;
+}
+
+inline bool TPrefixTreeRobotsTxtRulesHandler::IsDisallowAll(const ui32 botId, bool useAny) const {
+ const auto id = GetMappedBotId(botId, false);
+ if (id) {
+ const auto& botInfo = BotIdToPrefixTreeBotInfo[*id];
+ return botInfo && botInfo->DisallowAll;
+ }
+
+ auto& botInfo = BotIdToPrefixTreeBotInfo[robotstxtcfg::id_anybot];
+ return useAny && botInfo && botInfo->DisallowAll;
+}
+
+inline bool TPrefixTreeRobotsTxtRulesHandler::IsDisallowAll() const {
+ for (ui32 botId = 0; botId < robotstxtcfg::max_botid; ++botId)
+ if (robotstxtcfg::IsYandexBotId(botId) && IsBotIdSupported(botId) && !IsDisallowAll(botId))
+ return false;
+
+ return true;
+}
+
+void TPrefixTreeRobotsTxtRulesHandler::DoAllowAll() {
+ using robotstxtcfg::id_anybot;
+
+ // Drop all bots to default
+ SupportedBotIds.insert(id_anybot);
+ for (ui32 botId = 0; botId < robotstxtcfg::max_botid; ++botId) {
+ if (IsBotIdSupported(botId)) {
+ ClearInternal(botId);
+ OptimizedBotIdToStoredBotId[botId] = id_anybot;
+ LoadedBotIds.insert(botId);
+ }
+ }
+
+ // Initialize anybot with "allow all" rule
+ AddRule(id_anybot, "/", 'A');
+ GetInfo(id_anybot).AllowAll = true;
+ SaveRulesToBuffer();
+}
+
+void TPrefixTreeRobotsTxtRulesHandler::DoDisallowAll() {
+ for (ui32 botId = 0; botId < robotstxtcfg::max_botid; ++botId) {
+ if (!IsBotIdSupported(botId))
+ continue;
+ ClearInternal(botId);
+ if (botId == robotstxtcfg::id_anybot) {
+ auto& botInfo = GetInfo(botId);
+ AddRule(botId, "/", 'D');
+ botInfo.DisallowAll = true;
+ SaveRulesToBuffer();
+ } else {
+ OptimizedBotIdToStoredBotId[botId] = robotstxtcfg::id_anybot;
+ }
+ LoadedBotIds.insert(botId);
+ }
+}
+
+const char* TPrefixTreeRobotsTxtRulesHandler::IsDisallow(const ui32 botId, const char* s, bool useAny) const {
+ const auto id = GetMappedBotId(botId, useAny);
+ if (!id)
+ return nullptr;
+
+ const auto& botInfo = BotIdToPrefixTreeBotInfo[*id];
+ if (botInfo && IsDisallowAll(*id, useAny)) {
+ int index = (const_cast<TPrefixTreeRobotsTxtRulesHandler*>(this))->FindRuleAll(*botInfo, 'D');
+ if (index < 0) { //o_O
+ return botInfo->Rules.Get()[0];
+ } else {
+ return botInfo->Rules.Get()[index];
+ }
+ }
+
+ return GetRule(*id, s, 'D');
+}
+
+const char* TPrefixTreeRobotsTxtRulesHandler::IsAllow(const ui32 botId, const char* s) const {
+ const auto id = GetMappedBotId(botId, true);
+ if (auto p = GetRule(*id, s, 'A'))
+ return p;
+ return GetRule(*id, s, 'D') ? nullptr : "/";
+}
+
+int TPrefixTreeRobotsTxtRulesHandler::StrLenWithoutStars(const char* s) {
+ int len = 0;
+
+ for (size_t index = 0; s[index]; ++index) {
+ if (s[index] != '*') {
+ ++len;
+ }
+ }
+
+ return len;
+}
+
+int TPrefixTreeRobotsTxtRulesHandler::TraceBuffer(const ui32 botId, int countRules, const TArrayHolder<TRuleInfo>* ruleInfos) {
+ CheckBotIdValidity(botId);
+ auto& prefixBotInfo = GetInfo(botId);
+ TBotInfo& botInfo = BotIdToInfo[botId];
+
+ bool store = countRules >= 0;
+ if (store) {
+ prefixBotInfo.Rules.Reset(new char*[prefixBotInfo.RulesSize = countRules]);
+ }
+
+ int beg = -1, n = 0;
+ *((int*)prefixBotInfo.Buffer.Get()) = prefixBotInfo.BufferSize;
+ for (size_t i = sizeof(prefixBotInfo.BufferPosition); i < prefixBotInfo.BufferPosition; ++i)
+ if (prefixBotInfo.Buffer.Get()[i] == '\n' || prefixBotInfo.Buffer.Get()[i] == 0) {
+ if (beg < 0 || beg + 1 == (int)i)
+ continue;
+
+ char* s = prefixBotInfo.Buffer.Get() + beg;
+ if (store) {
+ switch (*s) {
+ case 'H':
+ HostDirective = s + 1;
+ break;
+ case 'S':
+ SiteMaps.insert(s + 1);
+ break;
+ case 'C':
+ ParseCrawlDelay(s + 1, botInfo.CrawlDelay);
+ break;
+ case 'P':
+ CleanParams.insert(s + 1);
+ break;
+ default:
+ prefixBotInfo.Rules.Get()[n] = s + 1;
+ (*ruleInfos).Get()[n].Len = StrLenWithoutStars(s + 1);
+ (*ruleInfos).Get()[n].Allow = toupper(*s) == 'A';
+
+ prefixBotInfo.HasAllow |= toupper(*s) == 'A';
+ prefixBotInfo.HasDisallow |= toupper(*s) == 'D';
+ break;
+ }
+ }
+ n += (*s != 'H' && *s != 'S' && *s != 'C' && *s != 'P');
+ beg = -1;
+ } else if (beg < 0)
+ beg = i;
+
+ return n;
+}
+
+int TPrefixTreeRobotsTxtRulesHandler::FindRuleAll(const TPrefixTreeBotInfo& prefixBotInfo, const char neededType) {
+ static const char* all[] = {"*", "/", "*/", "/*", "*/*"};
+ for (int ruleNumber = prefixBotInfo.RulesSize - 1; ruleNumber >= 0; --ruleNumber) {
+ const char* curRule = prefixBotInfo.Rules.Get()[ruleNumber];
+ char ruleType = *(curRule - 1);
+
+ if (strlen(curRule) > 3)
+ break;
+ if (neededType != ruleType)
+ continue;
+
+ for (size_t i = 0; i < sizeof(all) / sizeof(char*); ++i)
+ if (strcmp(all[i], curRule) == 0)
+ return ruleNumber;
+ }
+ return -1;
+}
+
+bool TPrefixTreeRobotsTxtRulesHandler::HasDisallowRulePrevAllowAll(const TPrefixTreeBotInfo& prefixBotInfo, int ruleAllAllow) {
+ for (int ruleNumber = ruleAllAllow - 1; ruleNumber >= 0; --ruleNumber) {
+ const char* curRule = prefixBotInfo.Rules.Get()[ruleNumber];
+ char ruleType = *(curRule - 1);
+ if (tolower(ruleType) == 'd')
+ return true;
+ }
+ return false;
+}
+
+bool TPrefixTreeRobotsTxtRulesHandler::CheckAllowDisallowAll(const ui32 botId, const bool checkDisallow) {
+ CheckBotIdValidity(botId);
+
+ auto& botInfo = GetInfo(botId);
+
+ if (botInfo.RulesSize == 0)
+ return !checkDisallow;
+ if (botInfo.RulesPosition <= 0)
+ return 0;
+
+ if (checkDisallow)
+ return !botInfo.HasAllow && FindRuleAll(botInfo, 'D') >= 0;
+ int ruleAllAllow = FindRuleAll(botInfo, 'A');
+ if (ruleAllAllow == -1)
+ return !botInfo.HasDisallow;
+ return !HasDisallowRulePrevAllowAll(botInfo, ruleAllAllow);
+}
+
+void TPrefixTreeRobotsTxtRulesHandler::SortRules(
+ TPrefixTreeBotInfo& prefixBotInfo,
+ size_t count,
+ const TArrayHolder<TRuleInfo>* ruleInfos) {
+ TVector<size_t> indexes(count);
+ for (size_t index = 0; index < count; ++index)
+ indexes[index] = index;
+
+ TRulesSortFunc sortFunc(ruleInfos);
+ std::sort(indexes.begin(), indexes.end(), sortFunc);
+
+ TArrayHolder<char*> workingCopy;
+ workingCopy.Reset(new char*[count]);
+
+ for (size_t index = 0; index < count; ++index)
+ workingCopy.Get()[index] = prefixBotInfo.Rules.Get()[index];
+ for (size_t index = 0; index < count; ++index)
+ prefixBotInfo.Rules.Get()[index] = workingCopy.Get()[indexes[index]];
+}
+
+void TPrefixTreeRobotsTxtRulesHandler::SaveRulesToBuffer() {
+ // as sitemaps, clean-params and HostDirective from prefix tree was deleted
+ for (const auto& sitemap: SiteMaps)
+ AddRule(robotstxtcfg::id_anybot, sitemap, 'S');
+ for (const auto& param : CleanParams)
+ AddRule(robotstxtcfg::id_anybot, param, 'P');
+ if (!HostDirective.empty())
+ AddRule(robotstxtcfg::id_anybot, HostDirective, 'H');
+}
+
+void TPrefixTreeRobotsTxtRulesHandler::SaveRulesFromBuffer(const ui32 botId) {
+ CheckBotIdValidity(botId);
+
+ auto& botInfo = GetInfo(botId);
+
+ TArrayHolder<TRuleInfo> ruleInfos;
+
+ int n = TraceBuffer(botId, -1, nullptr), countPrefix = 0;
+ ruleInfos.Reset(new TRuleInfo[n]);
+ botInfo.RulesPosition = TraceBuffer(botId, n, &ruleInfos);
+ assert(botInfo.RulesPosition == n);
+
+ SortRules(botInfo, n, &ruleInfos);
+
+ botInfo.DisallowAll = CheckAllowDisallowAll(botId, true);
+ botInfo.AllowAll = CheckAllowDisallowAll(botId, false);
+
+ for (int i = 0; i < n; ++i)
+ countPrefix += !!isupper(*(botInfo.Rules.Get()[i] - 1));
+
+ botInfo.PrefixRules.Init(countPrefix);
+ botInfo.ComplexRules.Reset(new char**[botInfo.ComplexRulesSize = n - countPrefix]);
+ botInfo.ComplexRulesPosition = 0;
+
+ for (int i = 0; i < n; ++i) {
+ char* s = botInfo.Rules.Get()[i];
+ if (isupper(*(s - 1)))
+ botInfo.PrefixRules.Add(s, i);
+ else
+ botInfo.ComplexRules.Get()[botInfo.ComplexRulesPosition++] = &botInfo.Rules.Get()[i];
+ }
+ botInfo.PrefixRules.Compress();
+}
+
+void TPrefixTreeRobotsTxtRulesHandler::AfterParse(const ui32 botId) {
+ CheckBotIdValidity(botId);
+
+ auto& botInfo = GetInfo(botId);
+
+ ResizeBuffer(botId, botInfo.BufferPosition);
+ SaveRulesFromBuffer(botId);
+
+ if (botInfo.RulesPosition == 0) {
+ AddRule(botId, "/", 'A');
+ }
+}
+
+TPrefixTreeRobotsTxtRulesHandler::TPrefixTreeBotInfo& TPrefixTreeRobotsTxtRulesHandler::GetInfo(ui32 botId) {
+ Y_ENSURE(botId < robotstxtcfg::max_botid);
+ auto& res = BotIdToPrefixTreeBotInfo[botId];
+ if (!res) {
+ res = MakeHolder<TPrefixTreeBotInfo>();
+ }
+ return *res;
+}
+
+bool TPrefixTreeRobotsTxtRulesHandler::CheckRule(const char* s, const char* rule) {
+ const char* r = rule;
+ const char* s_end = s + strlen(s);
+ const char* r_end = r + strlen(r);
+ // assert( r && !strstr(r, "**") );
+ for (; *s; ++s) {
+ if ((s_end - s + 1) * 2 < (r_end - r))
+ return 0;
+ while (*r == '*')
+ ++r;
+
+ if (*s == *r) {
+ ++r;
+ } else {
+ while (r != rule && *r != '*')
+ --r;
+
+ if (*r != '*')
+ return 0;
+ if (*r == '*')
+ ++r;
+ if (*r == *s)
+ ++r;
+ }
+ }
+ return !*r || (!*(r + 1) && *r == '*');
+}
+
+bool TPrefixTreeRobotsTxtRulesHandler::IsFull(ui32 botId, size_t length) const {
+ Y_ENSURE(botId < robotstxtcfg::max_botid);
+ const auto& botInfo = BotIdToPrefixTreeBotInfo[botId];
+ if (!botInfo) {
+ return false;
+ }
+
+ return (size_t(botInfo->RulesPosition) >= MaxRulesNumber) || (botInfo->BufferPosition + length + 300 > size_t(RobotsMaxSize));
+}
+
+bool TPrefixTreeRobotsTxtRulesHandler::IsFullTotal() const {
+ size_t allBotsRulesCount = 0;
+ size_t allBotsBufferSize = 0;
+
+ for (const auto& botInfo : BotIdToPrefixTreeBotInfo) {
+ if (botInfo) {
+ allBotsRulesCount += botInfo->RulesPosition;
+ allBotsBufferSize += botInfo->BufferPosition;
+ }
+ }
+
+ return (allBotsRulesCount >= MaxRulesNumber) || (allBotsBufferSize + 300 > size_t(RobotsMaxSize));
+}
+
+size_t TPrefixTreeRobotsTxtRulesHandler::GetPacked(const char*& data) const {
+ Y_STATIC_THREAD(TBuffer)
+ packedRepresentation;
+
+ // calculate size, needed for packed data
+ size_t totalPackedSize = sizeof(ui32); // num of botids
+ ui32 numOfSupportedBots = 0;
+
+ for (size_t botId = 0; botId < robotstxtcfg::max_botid; ++botId) {
+ if (!IsBotIdSupported(botId)) {
+ continue;
+ }
+
+ const auto& botInfo = BotIdToPrefixTreeBotInfo[GetNotOptimizedBotId(botId)];
+ // botId + packedDataSize + packedData
+ totalPackedSize += sizeof(ui32) + (botInfo ? botInfo->BufferPosition : sizeof(ui32));
+ ++numOfSupportedBots;
+ }
+
+ ((TBuffer&)packedRepresentation).Reserve(totalPackedSize);
+
+ // fill packed data
+ char* packedPtr = ((TBuffer&)packedRepresentation).Data();
+
+ *((ui32*)packedPtr) = numOfSupportedBots;
+ packedPtr += sizeof(ui32);
+
+ for (size_t botId = 0; botId < robotstxtcfg::max_botid; ++botId) {
+ if (!IsBotIdSupported(botId)) {
+ continue;
+ }
+
+ const auto& botInfo = BotIdToPrefixTreeBotInfo[GetNotOptimizedBotId(botId)];
+ memcpy(packedPtr, &botId, sizeof(ui32));
+ packedPtr += sizeof(ui32);
+
+ if (botInfo) {
+ *((ui32*)botInfo->Buffer.Get()) = botInfo->BufferPosition;
+ memcpy(packedPtr, botInfo->Buffer.Get(), botInfo->BufferPosition);
+ packedPtr += botInfo->BufferPosition;
+ } else {
+ // In absense of bot info we serialize only size of its buffer, which is 4 because it takes 4 bytes
+ ui32 emptyBufferPosition = sizeof(ui32);
+ memcpy(packedPtr, &emptyBufferPosition, sizeof(ui32));
+ packedPtr += sizeof(ui32);
+ }
+ }
+
+ data = ((TBuffer&)packedRepresentation).Data();
+ return totalPackedSize;
+}
+
+void TPrefixTreeRobotsTxtRulesHandler::LoadPacked(const char* botsData, const char* botsDataEnd) {
+ Clear();
+
+ if (Y_UNLIKELY(botsDataEnd != nullptr && botsData >= botsDataEnd)) {
+ ythrow yexception() << "Buffer overflow";
+ }
+
+ ui32 numOfBots = *((ui32*)botsData);
+ botsData += sizeof(ui32);
+
+ for (ui32 botIndex = 0; botIndex < numOfBots; ++botIndex) {
+ if (Y_UNLIKELY(botsDataEnd != nullptr && botsData >= botsDataEnd)) {
+ ythrow yexception() << "Buffer overflow";
+ }
+
+ ui32 botId = 0;
+ memcpy(&botId, botsData, sizeof(ui32));
+ botsData += sizeof(ui32);
+
+ // skip bot id's, that not supported for now
+ if (botId >= robotstxtcfg::max_botid || !IsBotIdSupported(botId)) {
+ if (Y_UNLIKELY(botsDataEnd != nullptr && botsData >= botsDataEnd)) {
+ ythrow yexception() << "Buffer overflow";
+ }
+
+ ui32 oneBotPackedSize = 0;
+ memcpy(&oneBotPackedSize, botsData, sizeof(ui32));
+ botsData += oneBotPackedSize;
+
+ continue;
+ }
+
+ //SupportedBotIds.insert(botId);
+
+ auto& botInfo = GetInfo(botId);
+
+ if (Y_UNLIKELY(botsDataEnd != nullptr && botsData >= botsDataEnd)) {
+ ythrow yexception() << "Buffer overflow";
+ }
+
+ static_assert(sizeof(botInfo.BufferSize) == sizeof(ui32), "BufferSize must be 4 bytes");
+ static_assert(sizeof(botInfo.BufferPosition) == sizeof(ui32), "BufferPosition must be 4 bytes");
+
+ memcpy(&botInfo.BufferSize, botsData, sizeof(ui32));
+ memcpy(&botInfo.BufferPosition, botsData, sizeof(ui32));
+
+ if (Y_UNLIKELY(botsDataEnd != nullptr && (botsData + botInfo.BufferSize) > botsDataEnd)) {
+ ythrow yexception() << "Buffer overflow";
+ }
+
+ botInfo.Buffer.Reset(new char[botInfo.BufferSize]);
+ memcpy(botInfo.Buffer.Get(), botsData, botInfo.BufferSize);
+ SaveRulesFromBuffer(botId);
+
+ if (botInfo.BufferSize > (int)sizeof(ui32)) { // empty data for robots means, that we don't have section for this bot
+ LoadedBotIds.insert(botId);
+ }
+
+ botsData += botInfo.BufferSize;
+ }
+
+ OptimizeSize();
+}
+
+void TPrefixTreeRobotsTxtRulesHandler::Dump(const ui32 botId, FILE* dumpFile) {
+ if (!dumpFile)
+ dumpFile = stderr;
+ fprintf(dumpFile, "User-Agent: %s\n", robotstxtcfg::GetFullName(botId).data());
+ for (TRobotsTxtRulesIterator it = GetRulesIterator(botId); it.HasRule(); it.Next())
+ fprintf(dumpFile, "%s: %s\n", DirTypeToName(it.GetRuleType()), it.GetInitialRule().data());
+}
+
+void TPrefixTreeRobotsTxtRulesHandler::Dump(const ui32 botId, IOutputStream& out) {
+ out << "User-Agent: " << robotstxtcfg::GetFullName(botId) << Endl;
+ for (TRobotsTxtRulesIterator it = GetRulesIterator(botId); it.HasRule(); it.Next())
+ out << DirTypeToName(it.GetRuleType()) << ": " << it.GetInitialRule() << Endl;
+}
diff --git a/library/cpp/robots_txt/robots_txt.h b/library/cpp/robots_txt/robots_txt.h
new file mode 100644
index 0000000000..5ee48fb14f
--- /dev/null
+++ b/library/cpp/robots_txt/robots_txt.h
@@ -0,0 +1,605 @@
+#pragma once
+
+#include "constants.h"
+#include "robots_txt_parser.h"
+#include "prefix_tree.h"
+#include "robotstxtcfg.h"
+
+#include <util/generic/noncopyable.h>
+#include <util/generic/map.h>
+#include <util/generic/maybe.h>
+#include <util/generic/ptr.h>
+#include <util/generic/set.h>
+
+#include <array>
+#include <utility>
+
+
+enum EDirectiveType {
+ USER_AGENT = 1,
+ DISALLOW = 2,
+ ALLOW = 3,
+ HOST = 4,
+ SITEMAP = 5,
+ CRAWL_DELAY = 6,
+ CLEAN_PARAM = 7,
+ UNKNOWN = 9,
+};
+
+enum EFormatErrorType {
+ ERROR_RULE_NOT_SLASH = 1,
+ ERROR_ASTERISK_MULTI = 2,
+ ERROR_HOST_MULTI = 3,
+ ERROR_ROBOTS_HUGE = 4,
+ ERROR_RULE_BEFORE_USER_AGENT = 5,
+ ERROR_RULE_HUGE = 6,
+ ERROR_HOST_FORMAT = 7,
+ ERROR_TRASH = 8,
+ ERROR_SITEMAP_FORMAT = 9,
+ ERROR_CRAWL_DELAY_FORMAT = 10,
+ ERROR_CRAWL_DELAY_MULTI = 11,
+ ERROR_CLEAN_PARAM_FORMAT = 12,
+
+ WARNING_EMPTY_RULE = 30,
+ WARNING_SUSPECT_SYMBOL = 31,
+ WARNING_UNKNOWN_FIELD = 33,
+ WARNING_UPPER_REGISTER = 34,
+ WARNING_SITEMAP = 35,
+};
+
+class TRobotsTxtRulesIterator {
+private:
+ const char* Begin = nullptr;
+ const char* End = nullptr;
+
+public:
+ TRobotsTxtRulesIterator() = default;
+ TRobotsTxtRulesIterator(const char* begin, const char* end);
+ void Next();
+ bool HasRule() const;
+ const char* GetRule() const;
+ TString GetInitialRule() const; // unlike GetRule(), it neither omits trailing '$' nor adds redundant '*'
+ EDirectiveType GetRuleType() const;
+
+ static EDirectiveType CharToDirType(char ch);
+};
+
+class TRobotsTxtRulesHandlerBase {
+public:
+ typedef TVector<std::pair<EFormatErrorType, int>> TErrorVector;
+
+ TRobotsTxtRulesHandlerBase(
+ TBotIdSet supportedBotIds,
+ int robotsMaxSize,
+ int maxRulesNumber,
+ bool saveDataForAnyBot);
+
+ TRobotsTxtRulesHandlerBase(
+ const TSet<ui32>& supportedBotIds,
+ int robotsMaxSize,
+ int maxRulesNumber,
+ bool saveDataForAnyBot);
+
+ virtual ~TRobotsTxtRulesHandlerBase();
+
+ int GetCrawlDelay(ui32 botId, bool* realInfo = nullptr) const;
+ int GetMinCrawlDelay(int defaultCrawlDelay = -1) const;
+ bool IsHandlingErrors() const;
+ const TString& GetHostDirective() const;
+ const TVector<TString> GetSiteMaps() const;
+ const TVector<TString> GetCleanParams() const;
+ const TErrorVector& GetErrors() const;
+ TVector<int> GetAcceptedLines(ui32 botId = robotstxtcfg::id_yandexbot) const;
+
+ template <class THostHandler>
+ static int ParseRules(TRobotsTxtParser& parser, TRobotsTxtRulesHandlerBase* rulesHandler, THostHandler* hostHandler, const char* host = nullptr);
+ static inline void ClearAllExceptCrossSection(TRobotsTxtParser& parser, TRobotsTxtRulesHandlerBase* rulesHandler, ui32 botId);
+ static int CheckHost(const char* host);
+ static int CheckSitemapUrl(const char* url, const char* host, TString& modifiedUrl);
+ static int CheckRule(const char* value, int line, TRobotsTxtRulesHandlerBase* rulesHandler);
+ static int CheckAndNormCleanParam(TString& s);
+ static int ParseCrawlDelay(const char* value, int& crawlDelay);
+ static EDirectiveType NameToDirType(const char* d);
+ static const char* DirTypeToName(EDirectiveType t);
+
+ void SetErrorsHandling(bool handleErrors);
+ void SetHostDirective(const char* hostDirective);
+ void SetCrawlDelay(ui32 botId, int crawlDelay);
+ void AddAcceptedLine(ui32 line, const TBotIdSet& botIds, bool isCrossSection);
+ void AddSiteMap(const char* sitemap);
+ void AddCleanParam(const char* cleanParam);
+ bool AddRuleWithErrorCheck(ui32 botId, TStringBuf rule, char type, TRobotsTxtParser& parser);
+ int OnHost(ui32 botId, TRobotsTxtParser& parser, const char* value, TRobotsTxtRulesHandlerBase*& rulesHandler);
+
+ virtual void Clear();
+ virtual bool IsAllowAll(ui32 botId) const = 0;
+ virtual bool IsAllowAll() const = 0;
+ virtual bool IsDisallowAll(ui32 botId, bool useAny = true) const = 0;
+ virtual bool IsDisallowAll() const = 0;
+ virtual const char* IsDisallow(ui32 botId, const char* s, bool useAny = true) const = 0;
+ virtual const char* IsAllow(ui32 botId, const char* s) const = 0;
+ virtual TRobotsTxtRulesIterator GetRulesIterator(ui32 botId) const = 0;
+ virtual void Dump(ui32 botId, FILE* logFile) = 0;
+ virtual void Dump(ui32 botId, IOutputStream& out) = 0;
+ virtual bool Empty(ui32 botId) const = 0;
+ virtual void LoadPacked(const char* botsData, const char* botsDataEnd = nullptr) = 0;
+ virtual size_t GetPacked(const char*& data) const = 0;
+ virtual void AfterParse(ui32 botId) = 0;
+ virtual void DoAllowAll() = 0;
+ virtual void DoDisallowAll() = 0;
+ bool IsBotIdLoaded(ui32 botId) const;
+ bool IsBotIdSupported(ui32 botId) const;
+ ui32 GetNotOptimizedBotId(ui32 botId) const;
+ TMaybe<ui32> GetMappedBotId(ui32 botId, bool useAny = true) const;
+
+protected:
+ void CheckBotIdValidity(ui32 botId) const;
+ virtual bool OptimizeSize() = 0;
+
+private:
+ bool HandleErrors;
+
+protected:
+ struct TBotInfo {
+ int CrawlDelay;
+
+ TBotInfo()
+ : CrawlDelay(-1)
+ {
+ }
+ };
+
+ TBotIdSet LoadedBotIds;
+ TSet<TString> SiteMaps;
+ TSet<TString> CleanParams;
+ TString HostDirective;
+ TErrorVector Errors;
+ typedef std::pair<ui32, ui32> TBotIdAcceptedLine;
+ TVector<TBotIdAcceptedLine> AcceptedLines;
+ TVector<ui32> CrossSectionAcceptedLines;
+
+ TVector<TBotInfo> BotIdToInfo;
+ int CrawlDelay;
+ size_t RobotsMaxSize;
+ size_t MaxRulesNumber;
+ bool SaveDataForAnyBot;
+
+ TBotIdSet SupportedBotIds;
+ std::array<ui8, robotstxtcfg::max_botid> OptimizedBotIdToStoredBotId;
+
+ virtual bool IsFull(ui32 botId, size_t length) const = 0;
+ virtual bool IsFullTotal() const = 0;
+ virtual bool AddRule(ui32 botId, TStringBuf rule, char type) = 0;
+ //parts of ParseRules
+ inline static void CheckRobotsLines(TRobotsTxtRulesHandlerBase* rulesHandler, TVector<int>& nonRobotsLines);
+ inline static void CheckAsterisk(TRobotsTxtRulesHandlerBase* rulesHandler, const char* value, ui32 lineNumber, bool& wasAsterisk);
+ inline static bool CheckWasUserAgent(TRobotsTxtRulesHandlerBase* rulesHandler, bool wasUserAgent, bool& ruleBeforeUserAgent, bool& wasRule, ui32 lineNumber);
+ inline static bool CheckRuleNotSlash(TRobotsTxtRulesHandlerBase* rulesHandler, const char* value, ui32 lineNumber);
+ inline static bool CheckSupportedBots(const TBotIdSet& currentBotIds, TBotIdSet& wasRuleForBot, const TBotIdSet& isSupportedBot);
+ inline static bool CheckEmptyRule(TRobotsTxtRulesHandlerBase* rulesHandler, const char* value, EDirectiveType& type, ui32 lineNumber);
+ inline static bool ProcessSitemap(TRobotsTxtRulesHandlerBase* rulesHandler, TRobotsTxtParser& parser, const char* value, const char* host);
+ inline static bool ProcessCleanParam(TRobotsTxtRulesHandlerBase* rulesHandler, TRobotsTxtParser& parser, TString& value);
+ inline static bool AddRules(
+ TRobotsTxtRulesHandlerBase* rulesHandler,
+ TRobotsTxtParser& parser,
+ const char* value,
+ char type,
+ const TBotIdSet& currentBotIds,
+ const TBotIdSet& isSupportedBot);
+
+ inline static bool ProcessCrawlDelay(
+ TRobotsTxtRulesHandlerBase* rulesHandler,
+ TRobotsTxtParser& parser,
+ const TBotIdSet& currentBotIds,
+ const TBotIdSet& isSupportedBot,
+ const char* value);
+
+ inline static void ProcessUserAgent(
+ TRobotsTxtRulesHandlerBase* rulesHandler,
+ TRobotsTxtParser& parser,
+ const TBotIdSet& currentBotIds,
+ TBotIdSet& wasRuleForBot,
+ TBotIdSet& isSupportedBot,
+ TVector<ui32>& botIdToMaxAppropriateUserAgentNameLength,
+ const char* value);
+
+ bool CheckRobot(
+ const char* userAgent,
+ TBotIdSet& botIds,
+ const TVector<ui32>* botIdToMaxAppropriateUserAgentNameLength = nullptr) const;
+
+ virtual void ClearInternal(ui32 botId);
+
+ void AddError(EFormatErrorType type, int line);
+
+ void ResetOptimized() noexcept;
+};
+
+class TPrefixTreeRobotsTxtRulesHandler: public TRobotsTxtRulesHandlerBase, TNonCopyable {
+private:
+ static const int INIT_BUFFER_SIZE = 1 << 6;
+
+ struct TRuleInfo {
+ size_t Len;
+ bool Allow;
+ };
+
+ bool IsFull(ui32 botId, size_t length) const override;
+ bool IsFullTotal() const override;
+ bool AddRule(ui32 botId, TStringBuf rule, char type) override;
+ const char* GetRule(ui32 botId, const char* s, char type) const;
+ void ResizeBuffer(ui32 botId, int newSize);
+ void SaveRulesFromBuffer(ui32 botId);
+ int TraceBuffer(ui32 botId, int countRules, const TArrayHolder<TRuleInfo>* ruleInfos);
+ bool CheckAllowDisallowAll(ui32 botId, bool checkDisallow);
+ void SaveRulesToBuffer();
+ int StrLenWithoutStars(const char* s);
+
+protected:
+ class TRulesSortFunc {
+ private:
+ const TArrayHolder<TRuleInfo>* RuleInfos;
+
+ public:
+ TRulesSortFunc(const TArrayHolder<TRuleInfo>* ruleInfos)
+ : RuleInfos(ruleInfos)
+ {
+ }
+ bool operator()(const size_t& lhs, const size_t& rhs) {
+ const TRuleInfo& left = (*RuleInfos).Get()[lhs];
+ const TRuleInfo& right = (*RuleInfos).Get()[rhs];
+ return (left.Len == right.Len) ? left.Allow && !right.Allow : left.Len > right.Len;
+ }
+ };
+
+ struct TPrefixTreeBotInfo {
+ bool DisallowAll = false;
+ bool AllowAll = false;
+ bool HasDisallow = false;
+ bool HasAllow = false;
+
+ TArrayHolder<char> Buffer{new char[INIT_BUFFER_SIZE]};
+ ui32 BufferPosition = sizeof(BufferPosition);
+ int BufferSize = INIT_BUFFER_SIZE;
+
+ TArrayHolder<char*> Rules = nullptr;
+ int RulesPosition = 0;
+ int RulesSize = 0;
+
+ TArrayHolder<char**> ComplexRules = nullptr;
+ int ComplexRulesPosition = 0;
+ int ComplexRulesSize = 0;
+
+ TPrefixTree PrefixRules {0};
+ };
+
+ std::array<THolder<TPrefixTreeBotInfo>, robotstxtcfg::max_botid> BotIdToPrefixTreeBotInfo;
+
+ TPrefixTreeBotInfo& GetInfo(ui32 botId);
+ static bool CheckRule(const char* s, const char* rule);
+ void ClearInternal(ui32 botId) override;
+ bool OptimizeSize() override;
+
+private:
+ void SortRules(TPrefixTreeBotInfo& prefixBotInfo, size_t count, const TArrayHolder<TRuleInfo>* ruleInfos);
+ bool HasDisallowRulePrevAllowAll(const TPrefixTreeBotInfo& prefixBotInfo, int ruleAllAllow);
+ int FindRuleAll(const TPrefixTreeBotInfo& prefixBotInfo, char neededType);
+
+public:
+ TPrefixTreeRobotsTxtRulesHandler(
+ TBotIdSet supportedBotIds = robotstxtcfg::defaultSupportedBotIds,
+ int robotsMaxSize = robots_max,
+ int maxRulesCount = -1,
+ bool saveDataForAnyBot = true);
+
+ TPrefixTreeRobotsTxtRulesHandler(
+ std::initializer_list<ui32> supportedBotIds,
+ int robotsMaxSize = robots_max,
+ int maxRulesCount = -1,
+ bool saveDataForAnyBot = true);
+
+ TPrefixTreeRobotsTxtRulesHandler(
+ const TSet<ui32>& supportedBotIds,
+ int robotsMaxSize = robots_max,
+ int maxRulesCount = -1,
+ bool saveDataForAnyBot = true);
+
+ void Clear() override;
+ void AfterParse(ui32 botId) override;
+ bool IsAllowAll(ui32 botId) const override;
+ bool IsAllowAll() const override;
+ bool IsDisallowAll(ui32 botId, bool useAny = true) const override;
+ bool IsDisallowAll() const override;
+ const char* IsDisallow(ui32 botId, const char* s, bool useAny = true) const override;
+ const char* IsAllow(ui32 botId, const char* s) const override;
+ TRobotsTxtRulesIterator GetRulesIterator(ui32 botId) const override;
+ void DoAllowAll() override;
+ void DoDisallowAll() override;
+ bool Empty(ui32 botId) const override;
+
+ void LoadPacked(const char* botsData, const char* botsDataEnd = nullptr) override;
+ size_t GetPacked(const char*& data) const override;
+ void Dump(ui32 botId, FILE* logFile) override;
+ void Dump(ui32 botId, IOutputStream& out) override;
+ size_t GetMemorySize();
+};
+
+using TRobotsTxt = TPrefixTreeRobotsTxtRulesHandler;
+
+void TRobotsTxtRulesHandlerBase::ClearAllExceptCrossSection(TRobotsTxtParser& parser, TRobotsTxtRulesHandlerBase* rulesHandler, ui32 botId) {
+ rulesHandler->ClearInternal(botId);
+ if (botId == robotstxtcfg::id_anybot) {
+ // as sitemaps, clean-params and HostDirective from prefix tree was deleted
+ for (const auto& sitemap : rulesHandler->SiteMaps) {
+ rulesHandler->AddRuleWithErrorCheck(robotstxtcfg::id_anybot, sitemap, 'S', parser);
+ }
+ for (const auto& param : rulesHandler->CleanParams) {
+ rulesHandler->AddRuleWithErrorCheck(robotstxtcfg::id_anybot, param, 'P', parser);
+ }
+ if (!rulesHandler->HostDirective.empty()) {
+ rulesHandler->AddRuleWithErrorCheck(robotstxtcfg::id_anybot, rulesHandler->HostDirective, 'H', parser);
+ }
+ }
+}
+
+void TRobotsTxtRulesHandlerBase::CheckRobotsLines(TRobotsTxtRulesHandlerBase* rulesHandler, TVector<int>& nonRobotsLines) {
+ if (rulesHandler->IsHandlingErrors()) {
+ for (size_t i = 0; i < nonRobotsLines.size(); ++i)
+ rulesHandler->AddError(ERROR_TRASH, nonRobotsLines[i]);
+ nonRobotsLines.clear();
+ }
+}
+
+void TRobotsTxtRulesHandlerBase::CheckAsterisk(TRobotsTxtRulesHandlerBase* rulesHandler, const char* value, ui32 lineNumber, bool& wasAsterisk) {
+ if (strcmp(value, "*") == 0) {
+ if (wasAsterisk)
+ rulesHandler->AddError(ERROR_ASTERISK_MULTI, lineNumber);
+ wasAsterisk = true;
+ }
+}
+
+bool TRobotsTxtRulesHandlerBase::CheckWasUserAgent(TRobotsTxtRulesHandlerBase* rulesHandler, bool wasUserAgent, bool& ruleBeforeUserAgent, bool& wasRule, ui32 lineNumber) {
+ if (wasUserAgent) {
+ wasRule = true;
+ return false;
+ }
+ if (!ruleBeforeUserAgent) {
+ ruleBeforeUserAgent = true;
+ rulesHandler->AddError(ERROR_RULE_BEFORE_USER_AGENT, lineNumber);
+ }
+ return true;
+}
+
+bool TRobotsTxtRulesHandlerBase::CheckRuleNotSlash(TRobotsTxtRulesHandlerBase* rulesHandler, const char* value, ui32 lineNumber) {
+ if (*value && *value != '/' && *value != '*') {
+ rulesHandler->AddError(ERROR_RULE_NOT_SLASH, lineNumber);
+ return true;
+ }
+ return false;
+}
+
+bool TRobotsTxtRulesHandlerBase::CheckSupportedBots(
+ const TBotIdSet& currentBotIds,
+ TBotIdSet& wasRuleForBot,
+ const TBotIdSet& isSupportedBot)
+{
+ bool hasAtLeastOneSupportedBot = false;
+ for (ui32 currentBotId : currentBotIds) {
+ wasRuleForBot.insert(currentBotId);
+ hasAtLeastOneSupportedBot = hasAtLeastOneSupportedBot || isSupportedBot.contains(currentBotId);
+ }
+ return hasAtLeastOneSupportedBot;
+}
+
+bool TRobotsTxtRulesHandlerBase::CheckEmptyRule(TRobotsTxtRulesHandlerBase* rulesHandler, const char* value, EDirectiveType& type, ui32 lineNumber) {
+ if (value && strlen(value) == 0) {
+ rulesHandler->AddError(WARNING_EMPTY_RULE, lineNumber);
+ type = type == ALLOW ? DISALLOW : ALLOW;
+ return true;
+ }
+ return false;
+}
+
+bool TRobotsTxtRulesHandlerBase::AddRules(
+ TRobotsTxtRulesHandlerBase* rulesHandler,
+ TRobotsTxtParser& parser,
+ const char* value,
+ char type,
+ const TBotIdSet& currentBotIds,
+ const TBotIdSet& isSupportedBot)
+{
+ for (ui32 currentBotId : currentBotIds) {
+ if (!isSupportedBot.contains(currentBotId))
+ continue;
+ if (!rulesHandler->AddRuleWithErrorCheck(currentBotId, value, type, parser))
+ return true;
+ }
+ return false;
+}
+
+bool TRobotsTxtRulesHandlerBase::ProcessSitemap(TRobotsTxtRulesHandlerBase* rulesHandler, TRobotsTxtParser& parser, const char* value, const char* host) {
+ TString modifiedUrl;
+ if (!CheckSitemapUrl(value, host, modifiedUrl))
+ rulesHandler->AddError(ERROR_SITEMAP_FORMAT, parser.GetLineNumber());
+ else {
+ rulesHandler->AddSiteMap(modifiedUrl.data());
+ if (!rulesHandler->AddRuleWithErrorCheck(robotstxtcfg::id_anybot, modifiedUrl.data(), 'S', parser))
+ return true;
+ }
+ return false;
+}
+
+bool TRobotsTxtRulesHandlerBase::ProcessCleanParam(TRobotsTxtRulesHandlerBase* rulesHandler, TRobotsTxtParser& parser, TString& value) {
+ if (!CheckAndNormCleanParam(value))
+ rulesHandler->AddError(ERROR_CLEAN_PARAM_FORMAT, parser.GetLineNumber());
+ else {
+ rulesHandler->AddCleanParam(value.data());
+ if (!rulesHandler->AddRuleWithErrorCheck(robotstxtcfg::id_anybot, value.data(), 'P', parser))
+ return true;
+ }
+ return false;
+}
+
+bool TRobotsTxtRulesHandlerBase::ProcessCrawlDelay(
+ TRobotsTxtRulesHandlerBase* rulesHandler,
+ TRobotsTxtParser& parser,
+ const TBotIdSet& currentBotIds,
+ const TBotIdSet& isSupportedBot,
+ const char* value) {
+ for (ui32 currentBotId : currentBotIds) {
+ if (!isSupportedBot.contains(currentBotId))
+ continue;
+ if (rulesHandler->BotIdToInfo[currentBotId].CrawlDelay >= 0) {
+ rulesHandler->AddError(ERROR_CRAWL_DELAY_MULTI, parser.GetLineNumber());
+ break;
+ }
+ int crawlDelay = -1;
+ if (!ParseCrawlDelay(value, crawlDelay))
+ rulesHandler->AddError(ERROR_CRAWL_DELAY_FORMAT, parser.GetLineNumber());
+ else {
+ rulesHandler->SetCrawlDelay(currentBotId, crawlDelay);
+ if (!rulesHandler->AddRuleWithErrorCheck(currentBotId, value, 'C', parser))
+ return true;
+ }
+ }
+ return false;
+}
+
+void TRobotsTxtRulesHandlerBase::ProcessUserAgent(
+ TRobotsTxtRulesHandlerBase* rulesHandler,
+ TRobotsTxtParser& parser,
+ const TBotIdSet& currentBotIds,
+ TBotIdSet& wasSupportedBot,
+ TBotIdSet& isSupportedBot,
+ TVector<ui32>& botIdToMaxAppropriateUserAgentNameLength,
+ const char* value)
+{
+ ui32 userAgentNameLength = (ui32)strlen(value);
+
+ for (ui32 currentBotId : currentBotIds) {
+ bool userAgentNameLonger = userAgentNameLength > botIdToMaxAppropriateUserAgentNameLength[currentBotId];
+ bool userAgentNameSame = userAgentNameLength == botIdToMaxAppropriateUserAgentNameLength[currentBotId];
+
+ if (!wasSupportedBot.contains(currentBotId) || userAgentNameLonger)
+ ClearAllExceptCrossSection(parser, rulesHandler, currentBotId);
+
+ wasSupportedBot.insert(currentBotId);
+ if (userAgentNameLonger || userAgentNameSame) {
+ isSupportedBot.insert(currentBotId); // Allow multiple blocks for the same user agent
+ }
+ botIdToMaxAppropriateUserAgentNameLength[currentBotId] = Max(userAgentNameLength, botIdToMaxAppropriateUserAgentNameLength[currentBotId]);
+ }
+}
+
+template <class THostHandler>
+int TRobotsTxtRulesHandlerBase::ParseRules(TRobotsTxtParser& parser, TRobotsTxtRulesHandlerBase* rulesHandler, THostHandler* hostHandler, const char* host) {
+ rulesHandler->Clear();
+
+ TBotIdSet wasSupportedBot;
+ TBotIdSet wasRuleForBot;
+ bool wasAsterisk = false;
+ TVector<int> nonRobotsLines;
+ TVector<ui32> botIdToMaxAppropriateUserAgentNameLength(robotstxtcfg::max_botid, 0);
+ static char all[] = "/";
+ EDirectiveType prevType = USER_AGENT;
+ while (parser.HasRecord()) {
+ TRobotsTxtRulesRecord record = parser.NextRecord();
+ bool wasUserAgent = false;
+ bool isRobotsRecordUseful = false;
+ TBotIdSet isSupportedBot;
+ TBotIdSet currentBotIds;
+ TString field;
+ TString value;
+ bool ruleBeforeUserAgent = false;
+ int ret = 0;
+ bool wasRule = false;
+ bool wasBlank = false;
+ while (record.NextPair(field, value, isRobotsRecordUseful && rulesHandler->IsHandlingErrors(), nonRobotsLines, &wasBlank)) {
+ CheckRobotsLines(rulesHandler, nonRobotsLines);
+ EDirectiveType type = NameToDirType(field.data());
+ EDirectiveType typeBeforeChange = type;
+
+ if ((prevType != type || wasBlank) && type == USER_AGENT) {
+ currentBotIds.clear();
+ }
+ prevType = type;
+
+ switch (type) {
+ case USER_AGENT:
+ if (wasUserAgent && wasRule) {
+ wasRule = false;
+ currentBotIds.clear();
+ isSupportedBot.clear();
+ }
+ wasUserAgent = true;
+ value.to_lower();
+ CheckAsterisk(rulesHandler, value.data(), parser.GetLineNumber(), wasAsterisk);
+ isRobotsRecordUseful = rulesHandler->CheckRobot(value.data(), currentBotIds, &botIdToMaxAppropriateUserAgentNameLength);
+ if (isRobotsRecordUseful)
+ ProcessUserAgent(rulesHandler, parser, currentBotIds, wasSupportedBot, isSupportedBot, botIdToMaxAppropriateUserAgentNameLength, value.data());
+ break;
+
+ case DISALLOW:
+ case ALLOW:
+ if (CheckWasUserAgent(rulesHandler, wasUserAgent, ruleBeforeUserAgent, wasRule, parser.GetLineNumber()))
+ break;
+ if (CheckRuleNotSlash(rulesHandler, value.data(), parser.GetLineNumber()))
+ break;
+ CheckRule(value.data(), parser.GetLineNumber(), rulesHandler);
+ if (!CheckSupportedBots(currentBotIds, wasRuleForBot, isSupportedBot)) {
+ break;
+ }
+ if (CheckEmptyRule(rulesHandler, value.data(), type, parser.GetLineNumber())) {
+ value = all;
+ if (typeBeforeChange == ALLOW)
+ continue;
+ }
+
+ if (AddRules(rulesHandler, parser, value.data(), type == ALLOW ? 'A' : 'D', currentBotIds, isSupportedBot))
+ return 2;
+ break;
+
+ case HOST:
+ value.to_lower();
+ ret = hostHandler->OnHost(robotstxtcfg::id_anybot, parser, value.data(), rulesHandler);
+ if (ret)
+ return ret;
+ break;
+
+ case SITEMAP:
+ if (ProcessSitemap(rulesHandler, parser, value.data(), host))
+ return 2;
+ break;
+
+ case CLEAN_PARAM:
+ if (ProcessCleanParam(rulesHandler, parser, value))
+ return 2;
+ break;
+
+ case CRAWL_DELAY:
+ if (ProcessCrawlDelay(rulesHandler, parser, currentBotIds, isSupportedBot, value.data()))
+ return 2;
+ break;
+
+ default:
+ rulesHandler->AddError(WARNING_UNKNOWN_FIELD, parser.GetLineNumber());
+ break;
+ }
+ bool isCrossSection = type == SITEMAP || type == HOST || type == CLEAN_PARAM;
+ if (rulesHandler->IsHandlingErrors() && (isRobotsRecordUseful || isCrossSection))
+ rulesHandler->AddAcceptedLine(parser.GetLineNumber(), currentBotIds, isCrossSection);
+ }
+ }
+
+ for (auto botId : wasSupportedBot) {
+ rulesHandler->LoadedBotIds.insert(botId);
+ if (rulesHandler->IsBotIdSupported(botId))
+ rulesHandler->AfterParse(botId);
+ }
+
+ if (!rulesHandler->OptimizeSize()) {
+ return 2;
+ }
+
+ return 1;
+}
diff --git a/library/cpp/robots_txt/robots_txt_parser.cpp b/library/cpp/robots_txt/robots_txt_parser.cpp
new file mode 100644
index 0000000000..8e2fe6073d
--- /dev/null
+++ b/library/cpp/robots_txt/robots_txt_parser.cpp
@@ -0,0 +1,116 @@
+#include "robots_txt_parser.h"
+#include <util/generic/string.h>
+#include <util/stream/output.h>
+
+TRobotsTxtParser::TRobotsTxtParser(IInputStream& inputStream)
+ : InputStream(inputStream)
+ , LineNumber(0)
+ , IsLastSymbolCR(false)
+{
+}
+
+int TRobotsTxtParser::GetLineNumber() {
+ return LineNumber;
+}
+
+const char* TRobotsTxtParser::ReadLine() {
+ Line = "";
+ char c;
+
+ if (IsLastSymbolCR) {
+ if (!InputStream.ReadChar(c))
+ return nullptr;
+ if (c != '\n')
+ Line.append(c);
+ }
+
+ bool hasMoreSymbols;
+ while (hasMoreSymbols = InputStream.ReadChar(c)) {
+ if (c == '\r') {
+ IsLastSymbolCR = true;
+ break;
+ } else {
+ IsLastSymbolCR = false;
+ if (c == '\n')
+ break;
+ Line.append(c);
+ }
+ }
+ if (!hasMoreSymbols && Line.empty())
+ return nullptr;
+
+ // BOM UTF-8: EF BB BF
+ if (0 == LineNumber && Line.size() >= 3 && Line[0] == '\xEF' && Line[1] == '\xBB' && Line[2] == '\xBF')
+ Line = Line.substr(3, Line.size() - 3);
+
+ ++LineNumber;
+ int i = Line.find('#');
+ if (i == 0)
+ Line = "";
+ else if (i > 0)
+ Line = Line.substr(0, i);
+ return Line.data();
+}
+
+bool TRobotsTxtParser::IsBlankLine(const char* s) {
+ for (const char* p = s; *p; ++p)
+ if (!isspace(*p))
+ return 0;
+ return 1;
+}
+
+char* TRobotsTxtParser::Trim(char* s) {
+ while (isspace(*s))
+ ++s;
+ char* p = s + strlen(s) - 1;
+ while (s < p && isspace(*p))
+ --p;
+ *(p + 1) = 0;
+ return s;
+}
+
+inline bool TRobotsTxtParser::IsRobotsLine(const char* s) {
+ return strchr(s, ':');
+}
+
+bool TRobotsTxtParser::HasRecord() {
+ while (!IsRobotsLine(Line.data()))
+ if (!ReadLine())
+ return 0;
+ return 1;
+}
+
+TRobotsTxtRulesRecord TRobotsTxtParser::NextRecord() {
+ return TRobotsTxtRulesRecord(*this);
+}
+
+TRobotsTxtRulesRecord::TRobotsTxtRulesRecord(TRobotsTxtParser& parser)
+ : Parser(parser)
+{
+}
+
+bool TRobotsTxtRulesRecord::NextPair(TString& field, TString& value, bool handleErrors, TVector<int>& nonRobotsLines, bool* wasBlank) {
+ if (wasBlank) {
+ *wasBlank = false;
+ }
+ while (!Parser.IsRobotsLine(Parser.Line.data())) {
+ if (!Parser.ReadLine())
+ return 0;
+ if (Parser.IsBlankLine(Parser.Line.data())) {
+ if (wasBlank) {
+ *wasBlank = true;
+ }
+ continue;
+ }
+ if (handleErrors && !Parser.IsRobotsLine(Parser.Line.data()))
+ nonRobotsLines.push_back(Parser.GetLineNumber());
+ }
+
+ char* s = strchr(Parser.Line.begin(), ':');
+ *s = 0;
+ char* p = s + 1;
+
+ field = TRobotsTxtParser::Trim(strlwr(Parser.Line.begin()));
+ value = TRobotsTxtParser::Trim(p);
+ return 1;
+}
diff --git a/library/cpp/robots_txt/robots_txt_parser.h b/library/cpp/robots_txt/robots_txt_parser.h
new file mode 100644
index 0000000000..8032d0d20b
--- /dev/null
+++ b/library/cpp/robots_txt/robots_txt_parser.h
@@ -0,0 +1,38 @@
+#pragma once
+
+#include <algorithm>
+#include <util/generic/string.h>
+#include <util/generic/vector.h>
+#include <util/stream/input.h>
+
+class TRobotsTxtParser;
+
+class TRobotsTxtRulesRecord {
+private:
+ TRobotsTxtParser& Parser;
+
+public:
+ TRobotsTxtRulesRecord(TRobotsTxtParser& parser);
+ bool NextPair(TString& field, TString& value, bool handleErrors, TVector<int>& nonRobotsLines, bool* wasBlank = nullptr);
+};
+
+class TRobotsTxtParser {
+ friend class TRobotsTxtRulesRecord;
+
+private:
+ IInputStream& InputStream;
+ TString Line;
+ int LineNumber;
+ bool IsLastSymbolCR;
+
+ const char* ReadLine();
+ static bool IsBlankLine(const char*);
+ static bool IsRobotsLine(const char*);
+
+public:
+ static char* Trim(char*);
+ TRobotsTxtParser(IInputStream& inputStream);
+ bool HasRecord();
+ TRobotsTxtRulesRecord NextRecord();
+ int GetLineNumber();
+};
diff --git a/library/cpp/robots_txt/robotstxtcfg.h b/library/cpp/robots_txt/robotstxtcfg.h
new file mode 100644
index 0000000000..5ca1682a0c
--- /dev/null
+++ b/library/cpp/robots_txt/robotstxtcfg.h
@@ -0,0 +1,3 @@
+#pragma once
+
+#include <library/cpp/robots_txt/robotstxtcfg/robotstxtcfg.h>
diff --git a/library/cpp/robots_txt/robotstxtcfg/CMakeLists.darwin-x86_64.txt b/library/cpp/robots_txt/robotstxtcfg/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..09cfd4b3f1
--- /dev/null
+++ b/library/cpp/robots_txt/robotstxtcfg/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,20 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-robots_txt-robotstxtcfg)
+target_link_libraries(cpp-robots_txt-robotstxtcfg PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-case_insensitive_string
+)
+target_sources(cpp-robots_txt-robotstxtcfg PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robotstxtcfg/bot_id_set.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robotstxtcfg/robotstxtcfg.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robotstxtcfg/user_agents.cpp
+)
diff --git a/library/cpp/robots_txt/robotstxtcfg/CMakeLists.linux-aarch64.txt b/library/cpp/robots_txt/robotstxtcfg/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..6fe7e7a7ad
--- /dev/null
+++ b/library/cpp/robots_txt/robotstxtcfg/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,21 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-robots_txt-robotstxtcfg)
+target_link_libraries(cpp-robots_txt-robotstxtcfg PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-case_insensitive_string
+)
+target_sources(cpp-robots_txt-robotstxtcfg PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robotstxtcfg/bot_id_set.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robotstxtcfg/robotstxtcfg.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robotstxtcfg/user_agents.cpp
+)
diff --git a/library/cpp/robots_txt/robotstxtcfg/CMakeLists.linux-x86_64.txt b/library/cpp/robots_txt/robotstxtcfg/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..6fe7e7a7ad
--- /dev/null
+++ b/library/cpp/robots_txt/robotstxtcfg/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,21 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-robots_txt-robotstxtcfg)
+target_link_libraries(cpp-robots_txt-robotstxtcfg PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-case_insensitive_string
+)
+target_sources(cpp-robots_txt-robotstxtcfg PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robotstxtcfg/bot_id_set.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robotstxtcfg/robotstxtcfg.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robotstxtcfg/user_agents.cpp
+)
diff --git a/library/cpp/robots_txt/robotstxtcfg/CMakeLists.txt b/library/cpp/robots_txt/robotstxtcfg/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/library/cpp/robots_txt/robotstxtcfg/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/library/cpp/robots_txt/robotstxtcfg/CMakeLists.windows-x86_64.txt b/library/cpp/robots_txt/robotstxtcfg/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..09cfd4b3f1
--- /dev/null
+++ b/library/cpp/robots_txt/robotstxtcfg/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,20 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-robots_txt-robotstxtcfg)
+target_link_libraries(cpp-robots_txt-robotstxtcfg PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-case_insensitive_string
+)
+target_sources(cpp-robots_txt-robotstxtcfg PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robotstxtcfg/bot_id_set.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robotstxtcfg/robotstxtcfg.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robotstxtcfg/user_agents.cpp
+)
diff --git a/library/cpp/robots_txt/robotstxtcfg/bot_id_set.cpp b/library/cpp/robots_txt/robotstxtcfg/bot_id_set.cpp
new file mode 100644
index 0000000000..aec668582c
--- /dev/null
+++ b/library/cpp/robots_txt/robotstxtcfg/bot_id_set.cpp
@@ -0,0 +1,2 @@
+#include "bot_id_set.h"
+// header compile test
diff --git a/library/cpp/robots_txt/robotstxtcfg/bot_id_set.h b/library/cpp/robots_txt/robotstxtcfg/bot_id_set.h
new file mode 100644
index 0000000000..08aaa68a50
--- /dev/null
+++ b/library/cpp/robots_txt/robotstxtcfg/bot_id_set.h
@@ -0,0 +1,132 @@
+#pragma once
+
+#include "user_agents.h"
+
+#include <bitset>
+
+
+/// Simple vector-based set for bot ids, meant to optimize memory and lookups
+class TBotIdSet
+{
+public:
+ using TData = std::bitset<robotstxtcfg::max_botid>;
+
+ constexpr TBotIdSet() noexcept = default;
+ constexpr TBotIdSet(const TBotIdSet&) noexcept = default;
+ constexpr TBotIdSet(TBotIdSet&&) noexcept = default;
+ constexpr TBotIdSet& operator = (const TBotIdSet&) noexcept = default;
+ constexpr TBotIdSet& operator = (TBotIdSet&&) noexcept = default;
+
+ TBotIdSet(std::initializer_list<ui32> botIds) {
+ for (auto id : botIds) {
+ insert(id);
+ }
+ }
+
+ static TBotIdSet All() noexcept {
+ TBotIdSet res;
+ res.Bots.set();
+ return res;
+ }
+
+ constexpr bool contains(ui32 botId) const noexcept {
+ return (botId < Bots.size()) && Bots[botId];
+ }
+
+ bool insert(ui32 botId) noexcept {
+ if (botId >= Bots.size() || Bots[botId]) {
+ return false;
+ }
+ Bots[botId] = true;
+ return true;
+ }
+
+ bool remove(ui32 botId) noexcept {
+ if (botId >= Bots.size() || !Bots[botId]) {
+ return false;
+ }
+ Bots[botId] = false;
+ return true;
+ }
+
+ void clear() noexcept {
+ Bots.reset();
+ }
+
+ size_t size() const noexcept {
+ return Bots.count();
+ }
+
+ bool empty() const noexcept {
+ return Bots.none();
+ }
+
+ bool operator==(const TBotIdSet& rhs) const noexcept = default;
+
+ TBotIdSet operator&(TBotIdSet rhs) const noexcept {
+ rhs.Bots &= Bots;
+ return rhs;
+ }
+
+ TBotIdSet operator|(TBotIdSet rhs) const noexcept {
+ rhs.Bots |= Bots;
+ return rhs;
+ }
+
+ TBotIdSet operator~() const noexcept {
+ TBotIdSet result;
+ result.Bots = ~Bots;
+ return result;
+ }
+
+ class iterator
+ {
+ public:
+ auto operator * () const noexcept {
+ return BotId;
+ }
+
+ iterator& operator ++ () noexcept {
+ while (BotId < Bots.size()) {
+ if (Bots[++BotId]) {
+ break;
+ }
+ }
+ return *this;
+ }
+
+ bool operator == (const iterator& rhs) const noexcept {
+ return (&Bots == &rhs.Bots) && (BotId == rhs.BotId);
+ }
+
+ bool operator != (const iterator& rhs) const noexcept {
+ return !(*this == rhs);
+ }
+
+ private:
+ friend class TBotIdSet;
+ iterator(const TData& bots, ui32 botId)
+ : Bots(bots)
+ , BotId(botId)
+ {
+ while (BotId < Bots.size() && !Bots[BotId]) {
+ ++BotId;
+ }
+ }
+
+ private:
+ const TData& Bots;
+ ui32 BotId;
+ };
+
+ iterator begin() const noexcept {
+ return {Bots, robotstxtcfg::id_anybot};
+ }
+
+ iterator end() const noexcept {
+ return {Bots, robotstxtcfg::max_botid};
+ }
+
+private:
+ TData Bots {};
+};
diff --git a/library/cpp/robots_txt/robotstxtcfg/robotstxtcfg.cpp b/library/cpp/robots_txt/robotstxtcfg/robotstxtcfg.cpp
new file mode 100644
index 0000000000..c5652b81c5
--- /dev/null
+++ b/library/cpp/robots_txt/robotstxtcfg/robotstxtcfg.cpp
@@ -0,0 +1,2 @@
+#include "robotstxtcfg.h"
+// header compile test
diff --git a/library/cpp/robots_txt/robotstxtcfg/robotstxtcfg.h b/library/cpp/robots_txt/robotstxtcfg/robotstxtcfg.h
new file mode 100644
index 0000000000..2cf9430d7c
--- /dev/null
+++ b/library/cpp/robots_txt/robotstxtcfg/robotstxtcfg.h
@@ -0,0 +1,11 @@
+#pragma once
+
+#include "bot_id_set.h"
+
+
+namespace robotstxtcfg {
+
+static const TBotIdSet defaultSupportedBotIds = {id_defbot};
+static const TBotIdSet allSupportedBotIds = TBotIdSet::All();
+
+} // namespace robotstxtcfg
diff --git a/library/cpp/robots_txt/robotstxtcfg/user_agents.cpp b/library/cpp/robots_txt/robotstxtcfg/user_agents.cpp
new file mode 100644
index 0000000000..60b353a427
--- /dev/null
+++ b/library/cpp/robots_txt/robotstxtcfg/user_agents.cpp
@@ -0,0 +1,2 @@
+#include "user_agents.h"
+// header compile test
diff --git a/library/cpp/robots_txt/robotstxtcfg/user_agents.h b/library/cpp/robots_txt/robotstxtcfg/user_agents.h
new file mode 100644
index 0000000000..59245d07cb
--- /dev/null
+++ b/library/cpp/robots_txt/robotstxtcfg/user_agents.h
@@ -0,0 +1,303 @@
+#pragma once
+
+#include <library/cpp/case_insensitive_string/case_insensitive_string.h>
+
+
+namespace robotstxtcfg {
+ // robots.txt agents and identifiers
+
+ enum EBots : ui32 {
+ id_anybot = 0,
+ id_yandexbot = 1,
+ id_yandexmediabot = 2,
+ id_yandeximagesbot = 3,
+ id_googlebot = 4,
+ id_yandexbotmirr = 5,
+ id_yahooslurp = 6,
+ id_msnbot = 7,
+ id_yandexcatalogbot = 8,
+ id_yandexdirectbot = 9,
+ id_yandexblogsbot = 10,
+ id_yandexnewsbot = 11,
+ id_yandexpagechk = 12,
+ id_yandexmetrikabot = 13,
+ id_yandexbrowser = 14,
+ id_yandexmarketbot = 15,
+ id_yandexcalendarbot = 16,
+ id_yandexwebmasterbot = 17,
+ id_yandexvideobot = 18,
+ id_yandeximageresizerbot = 19,
+ id_yandexadnetbot = 20,
+ id_yandexpartnerbot = 21,
+ id_yandexdirectdbot = 22,
+ id_yandextravelbot = 23,
+ id_yandexmobilebot = 24,
+ id_yandexrcabot = 25,
+ id_yandexdirectdynbot = 26,
+ id_yandexmobilebot_ed = 27,
+ id_yandexaccessibilitybot = 28,
+ id_baidubot = 29,
+ id_yandexscreenshotbot = 30,
+ id_yandexmetrikayabs = 31,
+ id_yandexvideoparserbot = 32,
+ id_yandexnewsbot4 = 33,
+ id_yandexmarketbot2 = 34,
+ id_yandexmedianabot = 35,
+ id_yandexsearchshopbot = 36,
+ id_yandexontodbbot = 37,
+ id_yandexontodbapibot = 38,
+ id_yandexampbot = 39,
+ id_yandexvideohosting = 40,
+ id_yandexmediaselling = 41,
+ id_yandexverticals = 42,
+ id_yandexturbobot = 43,
+ id_yandexzenbot = 44,
+ id_yandextrackerbot = 45,
+ id_yandexmetrikabot4 = 46,
+ id_yandexmobilescreenshotbot = 47,
+ id_yandexfaviconsbot = 48,
+ id_yandexrenderresourcesbot = 49,
+ id_yandexactivity = 50,
+ max_botid
+ };
+
+ static const ui32 id_defbot = id_yandexbot;
+
+ struct TBotInfo {
+ TCaseInsensitiveStringBuf ReqPrefix;
+ TCaseInsensitiveStringBuf FullName;
+ TStringBuf FromField = {};
+ TStringBuf UserAgent = {};
+ TStringBuf RotorUserAgent = {};
+ bool ExplicitDisallow = false;
+ };
+
+ static constexpr TStringBuf UserAgentFrom("support@search.yandex.ru");
+
+ static constexpr TBotInfo BotInfoArr[] = {
+ {"*", "*"},
+ {"Yandex", "YandexBot/3.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ false},
+ {"Yandex", "YandexMedia/3.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexMedia/3.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexMedia/3.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ false},
+ {"Yandex", "YandexImages/3.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexImages/3.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexImages/3.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ false},
+ {"Google", "GoogleBot"},
+ {"Yandex", "YandexBot/3.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexBot/3.0; MirrorDetector; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexBot/3.0; MirrorDetector; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ false},
+ {"Slurp", "Slurp"},
+ {"msn", "msnbot"},
+ {"Yandex", "YandexCatalog/3.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexCatalog/3.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexCatalog/3.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ false},
+ {"YaDirectFetcher", "YaDirectFetcher/1.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YaDirectFetcher/1.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YaDirectFetcher/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ true},
+
+ {"Yandex", "YandexBlogs/0.99", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexBlogs/0.99; robot; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexBlogs/0.99; robot; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ false},
+ {"Yandex", "YandexNews/3.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexNews/3.0; robot; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexNews/3.0; robot; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ false},
+ {"Yandex", "YandexPagechecker/2.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexPagechecker/2.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexPagechecker/2.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ false},
+ {"Yandex", "YandexMetrika/3.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexMetrika/3.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexMetrika/3.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ false},
+ {"Yandex", "YandexBrowser/1.0", UserAgentFrom,
+ "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/536.5 (KHTML, like Gecko) YaBrowser/1.0.1084.5402 Chrome/19.0.1084.5409 Safari/536.5",
+ "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/536.5 (KHTML, like Gecko) YaBrowser/1.0.1084.5402 Chrome/19.0.1084.5409 Safari/536.5",
+ false},
+ {"Yandex", "YandexMarket/1.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexMarket/1.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexMarket/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ false},
+ {"YandexCalendar", "YandexCalendar/1.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexCalendar/1.0 +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexCalendar/1.0 +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ true},
+ {"Yandex", "YandexWebmaster/2.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexWebmaster/2.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexWebmaster/2.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ false},
+ {"Yandex", "YandexVideo/3.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexVideo/3.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexVideo/3.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ false},
+ {"Yandex", "YandexImageResizer/2.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexImageResizer/2.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexImageResizer/2.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ false},
+
+ {"YandexDirect", "YandexDirect/3.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexDirect/3.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexDirect/3.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ true},
+ {"YandexPartner", "YandexPartner/3.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexPartner/3.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexPartner/3.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ true},
+ {"YaDirectFetcher", "YaDirectFetcher/1.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YaDirectFetcher/1.0; Dyatel; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YaDirectFetcher/1.0; Dyatel; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ true},
+ {"Yandex", "YandexTravel/1.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexTravel/1.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexTravel/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ false},
+ {"Yandex", "YandexBot/3.0", UserAgentFrom,
+ "Mozilla/5.0 (iPhone; CPU iPhone OS 8_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B411 Safari/600.1.4 (compatible; YandexBot/3.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (iPhone; CPU iPhone OS 8_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B411 Safari/600.1.4 (compatible; YandexBot/3.0; +http://yandex.com/bots)",
+ false},
+ {"YandexRCA", "YandexRCA/1.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexRCA/1.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexRCA/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ true},
+ {"YandexDirectDyn", "YandexDirectDyn/1.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexDirectDyn/1.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexDirectDyn/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ true},
+ {"YandexMobileBot", "YandexMobileBot/3.0", UserAgentFrom,
+ "Mozilla/5.0 (iPhone; CPU iPhone OS 15_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.4 Mobile/15E148 Safari/604.1 (compatible; YandexMobileBot/3.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (iPhone; CPU iPhone OS 15_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.4 Mobile/15E148 Safari/604.1 (compatible; YandexMobileBot/3.0; +http://yandex.com/bots)",
+ true},
+ {"YandexAccessibilityBot", "YandexAccessibilityBot/3.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexAccessibilityBot/3.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexAccessibilityBot/3.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ true},
+ {"Baidu", "Baiduspider"},
+
+ {"YandexScreenshotBot", "YandexScreenshotBot/3.0", UserAgentFrom,
+ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36 (compatible; YandexScreenshotBot/3.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36 (compatible; YandexScreenshotBot/3.0; +http://yandex.com/bots)",
+ true},
+ {"YandexMetrika", "YandexMetrika/2.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexMetrika/2.0; +http://yandex.com/bots yabs01)",
+ "Mozilla/5.0 (compatible; YandexMetrika/2.0; +http://yandex.com/bots yabs01) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ true},
+ {"YandexVideoParser", "YandexVideoParser/1.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexVideoParser/1.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexVideoParser/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ true},
+ {"Yandex", "YandexNews/4.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexNews/4.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexNews/4.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ true},
+ {"YandexMarket", "YandexMarket/2.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexMarket/2.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexMarket/2.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ true},
+ {"YandexMedianaBot", "YandexMedianaBot/1.0", UserAgentFrom,
+ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36 (compatible; YandexMedianaBot/1.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36 (compatible; YandexMedianaBot/1.0; +http://yandex.com/bots)",
+ true},
+ {"YandexSearchShop", "YandexSearchShop/1.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexSearchShop/1.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexSearchShop/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ true},
+ {"Yandex", "YandexOntoDB/1.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexOntoDB/1.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexOntoDB/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ false},
+ {"YandexOntoDBAPI", "YandexOntoDBAPI/1.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexOntoDBAPI/1.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexOntoDBAPI/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ true},
+ {"Yandex-AMPHTML", "Yandex-AMPHTML", UserAgentFrom,
+ "Mozilla/5.0 (compatible; Yandex-AMPHTML; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; Yandex-AMPHTML; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ true},
+
+ {"YandexVideoHosting", "YandexVideoHosting/1.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexVideoHosting/1.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexVideoHosting/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ true},
+ {"YandexMediaSelling", "YandexMediaSelling/1.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexMediaSelling/1.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexMediaSelling/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ true},
+ {"YandexVerticals", "YandexVerticals/1.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexVerticals/1.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexVerticals/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ true},
+ {"YandexTurbo", "YandexTurbo/1.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexTurbo/1.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexTurbo/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ true},
+ {"YandexZenRss", "YandexZenRss/1.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexZenRss/1.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexZenRss/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ true},
+ {"YandexTracker", "YandexTracker/1.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexTracker/1.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexTracker/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ true},
+ {"YandexMetrika", "YandexMetrika/4.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexMetrika/4.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexMetrika/4.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ true},
+ {"YandexMobileScreenShotBot", "YandexMobileScreenShotBot/1.0", UserAgentFrom,
+ "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/11.0 Mobile/12B411 Safari/600.1.4 (compatible; YandexBot/3.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/11.0 Mobile/12B411 Safari/600.1.4 (compatible; YandexBot/3.0; +http://yandex.com/bots)",
+ true},
+ {"YandexFavicons", "YandexFavicons/1.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexFavicons/1.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexFavicons/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ true},
+ {"YandexRenderResourcesBot", "YandexRenderResourcesBot/1.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexRenderResourcesBot/1.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexRenderResourcesBot/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ true},
+ {"YandexActivity", "YandexActivity/1.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexActivity; robot; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexActivity; robot; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ true}
+ };
+
+ static_assert(std::size(BotInfoArr) == max_botid);
+
+ constexpr auto GetReqPrefix(ui32 botId) {
+ return BotInfoArr[botId].ReqPrefix;
+ }
+
+ constexpr auto GetFullName(ui32 botId) {
+ return BotInfoArr[botId].FullName;
+ }
+
+ constexpr auto GetFromField(ui32 botId) {
+ return BotInfoArr[botId].FromField;
+ }
+
+ constexpr auto GetUserAgent(ui32 botId) {
+ return BotInfoArr[botId].UserAgent;
+ }
+
+ constexpr auto GetRotorUserAgent(ui32 botId) {
+ return BotInfoArr[botId].RotorUserAgent;
+ }
+
+ constexpr bool IsExplicitDisallow(ui32 botId) {
+ return BotInfoArr[botId].ExplicitDisallow;
+ }
+
+ constexpr bool IsYandexBotId(ui32 botId) {
+ return !BotInfoArr[botId].UserAgent.empty();
+ }
+
+} // namespace robotstxtcfg
diff --git a/library/cpp/robots_txt/robotstxtcfg/ya.make b/library/cpp/robots_txt/robotstxtcfg/ya.make
new file mode 100644
index 0000000000..61c731be42
--- /dev/null
+++ b/library/cpp/robots_txt/robotstxtcfg/ya.make
@@ -0,0 +1,13 @@
+LIBRARY()
+
+SRCS(
+ bot_id_set.cpp
+ robotstxtcfg.cpp
+ user_agents.cpp
+)
+
+PEERDIR(
+ library/cpp/case_insensitive_string
+)
+
+END()
diff --git a/library/cpp/robots_txt/rules_handler.cpp b/library/cpp/robots_txt/rules_handler.cpp
new file mode 100644
index 0000000000..4297db9d21
--- /dev/null
+++ b/library/cpp/robots_txt/rules_handler.cpp
@@ -0,0 +1,514 @@
+#include "robots_txt.h"
+#include "constants.h"
+
+#include <library/cpp/uri/http_url.h>
+#include <library/cpp/charset/ci_string.h>
+#include <library/cpp/string_utils/url/url.h>
+#include <util/system/maxlen.h>
+#include <util/generic/yexception.h>
+#include <util/generic/algorithm.h>
+
+
+namespace {
+
+TBotIdSet ConvertBotIdSet(const TSet<ui32>& botIds) noexcept {
+ TBotIdSet result;
+ for (auto id : botIds) {
+ result.insert(id);
+ }
+ return result;
+}
+
+} // namespace
+
+TRobotsTxtRulesIterator::TRobotsTxtRulesIterator(const char* begin, const char* end)
+ : Begin(begin)
+ , End(end)
+{
+}
+
+void TRobotsTxtRulesIterator::Next() {
+ while (Begin < End && *Begin)
+ ++Begin;
+ while (Begin < End && !isalpha(*Begin))
+ ++Begin;
+}
+
+bool TRobotsTxtRulesIterator::HasRule() const {
+ return Begin < End;
+}
+
+const char* TRobotsTxtRulesIterator::GetRule() const {
+ return Begin + 1;
+}
+
+TString TRobotsTxtRulesIterator::GetInitialRule() const {
+ auto begin = Begin + 1;
+ TStringBuf rule(begin, strlen(begin));
+
+ switch (*Begin) {
+ case 'a':
+ case 'd':
+ return rule.EndsWith('*') ? TString(rule.Chop(1)) : TString::Join(rule, '$');
+ default:
+ return TString(rule);
+ }
+}
+
+EDirectiveType TRobotsTxtRulesIterator::GetRuleType() const {
+ return CharToDirType(*Begin);
+}
+
+EDirectiveType TRobotsTxtRulesIterator::CharToDirType(char ch) {
+ switch (toupper(ch)) {
+ case 'A':
+ return ALLOW;
+ case 'C':
+ return CRAWL_DELAY;
+ case 'D':
+ return DISALLOW;
+ case 'H':
+ return HOST;
+ case 'P':
+ return CLEAN_PARAM;
+ case 'S':
+ return SITEMAP;
+ }
+ return UNKNOWN;
+}
+
+TRobotsTxtRulesHandlerBase::TRobotsTxtRulesHandlerBase(
+ TBotIdSet supportedBotIds,
+ int robotsMaxSize,
+ int maxRulesNumber,
+ bool saveDataForAnyBot)
+ : HandleErrors(false)
+ , SiteMaps()
+ , CleanParams()
+ , HostDirective("")
+ , Errors()
+ , AcceptedLines()
+ , CrossSectionAcceptedLines()
+ , BotIdToInfo(robotstxtcfg::max_botid)
+ , RobotsMaxSize(robotsMaxSize)
+ , MaxRulesNumber(maxRulesNumber)
+ , SaveDataForAnyBot(saveDataForAnyBot)
+ , SupportedBotIds(supportedBotIds)
+{
+ Y_ENSURE(!supportedBotIds.empty());
+
+ if (RobotsMaxSize <= 0)
+ RobotsMaxSize = robots_max;
+ if (MaxRulesNumber <= 0)
+ MaxRulesNumber = max_rules_count;
+
+ ResetOptimized();
+}
+
+TRobotsTxtRulesHandlerBase::TRobotsTxtRulesHandlerBase(
+ const TSet<ui32>& supportedBotIds,
+ int robotsMaxSize,
+ int maxRulesNumber,
+ bool saveDataForAnyBot)
+ : TRobotsTxtRulesHandlerBase(ConvertBotIdSet(supportedBotIds), robotsMaxSize, maxRulesNumber, saveDataForAnyBot)
+{}
+
+TRobotsTxtRulesHandlerBase::~TRobotsTxtRulesHandlerBase() = default;
+
+void TRobotsTxtRulesHandlerBase::CheckBotIdValidity(const ui32 botId) const {
+ if (botId >= robotstxtcfg::max_botid || !IsBotIdSupported(botId))
+ ythrow yexception() << "robots.txt parser requested for invalid or unsupported botId = " << botId << Endl;
+ ;
+}
+
+int TRobotsTxtRulesHandlerBase::GetCrawlDelay(const ui32 botId, bool* realInfo) const {
+ const auto id = GetMappedBotId(botId, false);
+ if (realInfo)
+ *realInfo = bool(id);
+ return BotIdToInfo[id.GetOrElse(robotstxtcfg::id_anybot)].CrawlDelay;
+}
+
+int TRobotsTxtRulesHandlerBase::GetMinCrawlDelay(int defaultCrawlDelay) const {
+ int res = INT_MAX;
+ bool useDefault = false;
+ for (ui32 botId = 0; botId < robotstxtcfg::max_botid; ++botId) {
+ if (robotstxtcfg::IsYandexBotId(botId) && IsBotIdSupported(botId) && !IsDisallowAll(botId)) {
+ bool realInfo;
+ int curCrawlDelay = GetCrawlDelay(botId, &realInfo);
+ if (realInfo) {
+ if (curCrawlDelay == -1) {
+ useDefault = true;
+ } else {
+ res = Min(res, curCrawlDelay);
+ }
+ }
+ }
+ }
+
+ if (useDefault && defaultCrawlDelay < res) {
+ return -1;
+ }
+
+ if (res == INT_MAX) {
+ res = GetCrawlDelay(robotstxtcfg::id_anybot);
+ }
+
+ return res;
+}
+
+void TRobotsTxtRulesHandlerBase::SetCrawlDelay(const ui32 botId, int crawlDelay) {
+ CheckBotIdValidity(botId);
+ BotIdToInfo[botId].CrawlDelay = crawlDelay;
+}
+
+const TVector<TString> TRobotsTxtRulesHandlerBase::GetSiteMaps() const {
+ return TVector<TString>(SiteMaps.begin(), SiteMaps.end());
+}
+
+void TRobotsTxtRulesHandlerBase::AddSiteMap(const char* sitemap) {
+ SiteMaps.insert(sitemap);
+}
+
+const TVector<TString> TRobotsTxtRulesHandlerBase::GetCleanParams() const {
+ return TVector<TString>(CleanParams.begin(), CleanParams.end());
+}
+
+void TRobotsTxtRulesHandlerBase::AddCleanParam(const char* cleanParam) {
+ CleanParams.insert(cleanParam);
+}
+
+const TString& TRobotsTxtRulesHandlerBase::GetHostDirective() const {
+ return HostDirective;
+}
+
+void TRobotsTxtRulesHandlerBase::SetHostDirective(const char* hostDirective) {
+ HostDirective = hostDirective;
+}
+
+const TRobotsTxtRulesHandlerBase::TErrorVector& TRobotsTxtRulesHandlerBase::GetErrors() const {
+ return Errors;
+}
+
+TVector<int> TRobotsTxtRulesHandlerBase::GetAcceptedLines(const ui32 botId) const {
+ TVector<int> ret;
+ for (size_t i = 0; i < CrossSectionAcceptedLines.size(); ++i)
+ ret.push_back(CrossSectionAcceptedLines[i]);
+
+ bool hasLinesForBotId = false;
+ for (size_t i = 0; i < AcceptedLines.size(); ++i) {
+ if (AcceptedLines[i].first == botId) {
+ hasLinesForBotId = true;
+ break;
+ }
+ }
+
+ for (size_t i = 0; i < AcceptedLines.size(); ++i) {
+ if (hasLinesForBotId && AcceptedLines[i].first == botId) {
+ ret.push_back(AcceptedLines[i].second);
+ } else if (!hasLinesForBotId && AcceptedLines[i].first == robotstxtcfg::id_anybot) {
+ ret.push_back(AcceptedLines[i].second);
+ }
+ }
+
+ Sort(ret.begin(), ret.end());
+
+ return ret;
+}
+
+void TRobotsTxtRulesHandlerBase::AddAcceptedLine(ui32 line, const TBotIdSet& botIds, bool isCrossSection) {
+ if (isCrossSection) {
+ CrossSectionAcceptedLines.push_back(line);
+ return;
+ }
+
+ for (auto botId : botIds) {
+ AcceptedLines.push_back(TBotIdAcceptedLine(botId, line));
+ }
+}
+
+void TRobotsTxtRulesHandlerBase::SetErrorsHandling(bool handleErrors) {
+ HandleErrors = handleErrors;
+}
+
+bool TRobotsTxtRulesHandlerBase::IsHandlingErrors() const {
+ return HandleErrors;
+}
+
+EDirectiveType TRobotsTxtRulesHandlerBase::NameToDirType(const char* d) {
+ if (!strcmp("disallow", d))
+ return DISALLOW;
+ if (!strcmp("allow", d))
+ return ALLOW;
+ if (!strcmp("user-agent", d))
+ return USER_AGENT;
+ if (!strcmp("host", d))
+ return HOST;
+ if (!strcmp("sitemap", d))
+ return SITEMAP;
+ if (!strcmp("clean-param", d))
+ return CLEAN_PARAM;
+ if (!strcmp("crawl-delay", d))
+ return CRAWL_DELAY;
+ return UNKNOWN;
+}
+
+const char* TRobotsTxtRulesHandlerBase::DirTypeToName(EDirectiveType t) {
+ static const char* name[] = {"Allow", "Crawl-Delay", "Disallow", "Host", "Clean-Param", "Sitemap", "User-Agent", "Unknown"};
+ switch (t) {
+ case ALLOW:
+ return name[0];
+ case CRAWL_DELAY:
+ return name[1];
+ case DISALLOW:
+ return name[2];
+ case HOST:
+ return name[3];
+ case CLEAN_PARAM:
+ return name[4];
+ case SITEMAP:
+ return name[5];
+ case USER_AGENT:
+ return name[6];
+ case UNKNOWN:
+ return name[7];
+ }
+ return name[7];
+}
+
+bool TRobotsTxtRulesHandlerBase::CheckRobot(
+ const char* userAgent,
+ TBotIdSet& botIds,
+ const TVector<ui32>* botIdToMaxAppropriateUserAgentNameLength) const
+{
+ TCaseInsensitiveStringBuf agent(userAgent);
+
+ for (size_t botIndex = 0; botIndex < robotstxtcfg::max_botid; ++botIndex) {
+ if (!IsBotIdSupported(botIndex))
+ continue;
+
+ bool hasRequiredAgentNamePrefix = agent.StartsWith(robotstxtcfg::GetReqPrefix(botIndex));
+ bool isContainedInFullName = robotstxtcfg::GetFullName(botIndex).StartsWith(agent);
+ bool wasMoreImportantAgent = false;
+ if (botIdToMaxAppropriateUserAgentNameLength)
+ wasMoreImportantAgent = agent.size() < (*botIdToMaxAppropriateUserAgentNameLength)[botIndex];
+
+ if (hasRequiredAgentNamePrefix && isContainedInFullName && !wasMoreImportantAgent) {
+ botIds.insert(botIndex);
+ }
+ }
+
+ return !botIds.empty();
+}
+
+int TRobotsTxtRulesHandlerBase::CheckRule(const char* value, int line, TRobotsTxtRulesHandlerBase* rulesHandler) {
+ if (!rulesHandler->IsHandlingErrors())
+ return 0;
+
+ if (auto len = strlen(value); len > max_rule_length) {
+ rulesHandler->AddError(ERROR_RULE_HUGE, line);
+ }
+
+ bool upper = false, suspect = false;
+ for (const char* r = value; *r; ++r) {
+ if (!upper && isupper(*r))
+ upper = true;
+ if (!suspect && !isalnum(*r) && !strchr("/_?=.-*%&~[]:;@", *r) && (*(r + 1) || *r != '$'))
+ suspect = true;
+ }
+ if (suspect)
+ rulesHandler->AddError(WARNING_SUSPECT_SYMBOL, line);
+ if (upper)
+ rulesHandler->AddError(WARNING_UPPER_REGISTER, line);
+ return suspect || upper;
+}
+
+void TRobotsTxtRulesHandlerBase::AddError(EFormatErrorType type, int line) {
+ if (!HandleErrors)
+ return;
+ Errors.push_back(std::make_pair(type, line));
+}
+
+void TRobotsTxtRulesHandlerBase::ResetOptimized() noexcept {
+ for (ui32 i = 0; i < OptimizedBotIdToStoredBotId.size(); ++i) {
+ OptimizedBotIdToStoredBotId[i] = i; // by default, every bot maps to itself
+ }
+}
+
+void TRobotsTxtRulesHandlerBase::Clear() {
+ SiteMaps.clear();
+ CleanParams.clear();
+ HostDirective = "";
+ if (HandleErrors) {
+ AcceptedLines.clear();
+ CrossSectionAcceptedLines.clear();
+ Errors.clear();
+ }
+
+ for (size_t botId = 0; botId < BotIdToInfo.size(); ++botId) {
+ BotIdToInfo[botId].CrawlDelay = -1;
+ }
+
+ LoadedBotIds.clear();
+}
+
+void TRobotsTxtRulesHandlerBase::ClearInternal(const ui32 botId) {
+ CheckBotIdValidity(botId);
+ BotIdToInfo[botId].CrawlDelay = -1;
+
+ TVector<TBotIdAcceptedLine> newAcceptedLines;
+ for (size_t i = 0; i < AcceptedLines.size(); ++i)
+ if (AcceptedLines[i].first != botId)
+ newAcceptedLines.push_back(AcceptedLines[i]);
+
+ AcceptedLines.swap(newAcceptedLines);
+}
+
+int TRobotsTxtRulesHandlerBase::CheckHost(const char* host) {
+ THttpURL parsed;
+ TString copyHost = host;
+
+ if (GetHttpPrefixSize(copyHost) == 0) {
+ copyHost = TString("http://") + copyHost;
+ }
+
+ return parsed.Parse(copyHost.data(), THttpURL::FeaturesRobot) == THttpURL::ParsedOK && parsed.GetField(THttpURL::FieldHost) != TString("");
+}
+
+int TRobotsTxtRulesHandlerBase::CheckSitemapUrl(const char* url, const char* host, TString& modifiedUrl) {
+ if (host != nullptr && strlen(url) > 0 && url[0] == '/') {
+ modifiedUrl = TString(host) + url;
+ } else {
+ modifiedUrl = url;
+ }
+
+ url = modifiedUrl.data();
+
+ if (strlen(url) >= URL_MAX - 8)
+ return 0;
+ THttpURL parsed;
+ if (parsed.Parse(url, THttpURL::FeaturesRobot) || !parsed.IsValidAbs())
+ return 0;
+ if (parsed.GetScheme() != THttpURL::SchemeHTTP && parsed.GetScheme() != THttpURL::SchemeHTTPS)
+ return 0;
+ return CheckHost(parsed.PrintS(THttpURL::FlagHostPort).data());
+}
+
+// s - is space separated pair of clean-params (separated by &) and path prefix
+int TRobotsTxtRulesHandlerBase::CheckAndNormCleanParam(TString& value) {
+ if (value.find(' ') == TString::npos) {
+ value.push_back(' ');
+ }
+
+ const char* s = value.data();
+ if (!s || !*s || strlen(s) > URL_MAX / 2 - 9)
+ return 0;
+ const char* p = s;
+ while (*p && !isspace(*p))
+ ++p;
+ for (; s != p; ++s) {
+ // allowed only following not alpha-numerical symbols
+ if (!isalnum(*s) && !strchr("+-=_&%[]{}():.", *s))
+ return 0;
+ // clean-params for prefix can be enumerated by & symbol, && not allowed syntax
+ if (*s == '&' && *(s + 1) == '&')
+ return 0;
+ }
+ const char* pathPrefix = p + 1;
+ while (isspace(*p))
+ ++p;
+ char r[URL_MAX];
+ char* pr = r;
+ for (; *p; ++p) {
+ if (!isalnum(*p) && !strchr(".-/*_,;:%", *p))
+ return 0;
+ if (*p == '*')
+ *pr++ = '.';
+ if (*p == '.')
+ *pr++ = '\\';
+ *pr++ = *p;
+ }
+ *pr++ = '.';
+ *pr++ = '*';
+ *pr = 0;
+ TString params = value.substr(0, pathPrefix - value.data());
+ value = params + r;
+ return 1;
+}
+
+int TRobotsTxtRulesHandlerBase::ParseCrawlDelay(const char* value, int& crawlDelay) {
+ static const int MAX_CRAWL_DELAY = 1 << 10;
+ int val = 0;
+ const char* p = value;
+ for (; isdigit(*p); ++p) {
+ val = val * 10 + *p - '0';
+ if (val > MAX_CRAWL_DELAY)
+ return 0;
+ }
+ if (*p) {
+ if (*p++ != '.')
+ return 0;
+ if (strspn(p, "1234567890") != strlen(p))
+ return 0;
+ }
+ for (const char* s = p; s - p < 3; ++s)
+ val = val * 10 + (s < p + strlen(p) ? *s - '0' : 0);
+ crawlDelay = val;
+ return 1;
+}
+
+bool TRobotsTxtRulesHandlerBase::AddRuleWithErrorCheck(const ui32 botId, TStringBuf rule, char type, TRobotsTxtParser& parser) {
+ if (!IsBotIdSupported(botId))
+ return true;
+
+ if (!AddRule(botId, rule, type)) {
+ AddError(ERROR_ROBOTS_HUGE, parser.GetLineNumber());
+ AfterParse(botId);
+ return false;
+ }
+ return true;
+}
+
+int TRobotsTxtRulesHandlerBase::OnHost(const ui32 botId, TRobotsTxtParser& parser, const char* value, TRobotsTxtRulesHandlerBase*& rulesHandler) {
+ // Temporary hack for correct repacking robots.txt from new format to old
+ // Remove it, when robot-stable-2010-10-17 will be deployed in production
+ if (!IsBotIdSupported(botId))
+ return 0;
+ // end of hack
+
+ if (rulesHandler->HostDirective != "")
+ rulesHandler->AddError(ERROR_HOST_MULTI, parser.GetLineNumber());
+ else {
+ if (!CheckHost(value))
+ rulesHandler->AddError(ERROR_HOST_FORMAT, parser.GetLineNumber());
+ else {
+ rulesHandler->SetHostDirective(value);
+ if (!rulesHandler->AddRuleWithErrorCheck(botId, value, 'H', parser))
+ return 2;
+ }
+ }
+ return 0;
+}
+
+bool TRobotsTxtRulesHandlerBase::IsBotIdLoaded(const ui32 botId) const {
+ return LoadedBotIds.contains(botId);
+}
+
+bool TRobotsTxtRulesHandlerBase::IsBotIdSupported(const ui32 botId) const {
+ return (SaveDataForAnyBot && botId == robotstxtcfg::id_anybot) || SupportedBotIds.contains(botId);
+}
+
+ui32 TRobotsTxtRulesHandlerBase::GetNotOptimizedBotId(const ui32 botId) const {
+ return (botId < OptimizedBotIdToStoredBotId.size())
+ ? OptimizedBotIdToStoredBotId[botId]
+ : botId;
+}
+
+TMaybe<ui32> TRobotsTxtRulesHandlerBase::GetMappedBotId(ui32 botId, bool useAny) const {
+ botId = GetNotOptimizedBotId(botId);
+ CheckBotIdValidity(botId);
+ if (IsBotIdLoaded(botId))
+ return botId;
+ if (useAny)
+ return robotstxtcfg::id_anybot;
+ return {};
+}
diff --git a/library/cpp/robots_txt/ya.make b/library/cpp/robots_txt/ya.make
new file mode 100644
index 0000000000..c12b57ea04
--- /dev/null
+++ b/library/cpp/robots_txt/ya.make
@@ -0,0 +1,18 @@
+LIBRARY()
+
+SRCS(
+ prefix_tree.cpp
+ prefix_tree_rules_handler.cpp
+ robots_txt_parser.cpp
+ rules_handler.cpp
+)
+
+PEERDIR(
+ library/cpp/robots_txt/robotstxtcfg
+ library/cpp/case_insensitive_string
+ library/cpp/charset
+ library/cpp/string_utils/url
+ library/cpp/uri
+)
+
+END()
diff --git a/library/cpp/yconf/CMakeLists.darwin-x86_64.txt b/library/cpp/yconf/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..4e5bbf836d
--- /dev/null
+++ b/library/cpp/yconf/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,19 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(library-cpp-yconf)
+target_link_libraries(library-cpp-yconf PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-charset
+ library-cpp-logger
+)
+target_sources(library-cpp-yconf PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/yconf/conf.cpp
+)
diff --git a/library/cpp/yconf/CMakeLists.linux-aarch64.txt b/library/cpp/yconf/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..8ddf881133
--- /dev/null
+++ b/library/cpp/yconf/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,20 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(library-cpp-yconf)
+target_link_libraries(library-cpp-yconf PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-charset
+ library-cpp-logger
+)
+target_sources(library-cpp-yconf PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/yconf/conf.cpp
+)
diff --git a/library/cpp/yconf/CMakeLists.linux-x86_64.txt b/library/cpp/yconf/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..8ddf881133
--- /dev/null
+++ b/library/cpp/yconf/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,20 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(library-cpp-yconf)
+target_link_libraries(library-cpp-yconf PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-charset
+ library-cpp-logger
+)
+target_sources(library-cpp-yconf PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/yconf/conf.cpp
+)
diff --git a/library/cpp/yconf/CMakeLists.txt b/library/cpp/yconf/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/library/cpp/yconf/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/library/cpp/yconf/CMakeLists.windows-x86_64.txt b/library/cpp/yconf/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..4e5bbf836d
--- /dev/null
+++ b/library/cpp/yconf/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,19 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(library-cpp-yconf)
+target_link_libraries(library-cpp-yconf PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-charset
+ library-cpp-logger
+)
+target_sources(library-cpp-yconf PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/yconf/conf.cpp
+)
diff --git a/mapreduce/CMakeLists.txt b/mapreduce/CMakeLists.txt
new file mode 100644
index 0000000000..ef64c4e308
--- /dev/null
+++ b/mapreduce/CMakeLists.txt
@@ -0,0 +1,9 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(yt)
diff --git a/mapreduce/yt/CMakeLists.txt b/mapreduce/yt/CMakeLists.txt
new file mode 100644
index 0000000000..38e1c6410c
--- /dev/null
+++ b/mapreduce/yt/CMakeLists.txt
@@ -0,0 +1,9 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(interface)
diff --git a/mapreduce/yt/interface/CMakeLists.txt b/mapreduce/yt/interface/CMakeLists.txt
new file mode 100644
index 0000000000..6d580ae9ad
--- /dev/null
+++ b/mapreduce/yt/interface/CMakeLists.txt
@@ -0,0 +1,9 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(protos)
diff --git a/mapreduce/yt/interface/protos/CMakeLists.darwin-x86_64.txt b/mapreduce/yt/interface/protos/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..e0f89c3a9a
--- /dev/null
+++ b/mapreduce/yt/interface/protos/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,57 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(yt-interface-protos)
+target_link_libraries(yt-interface-protos PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ yt_proto-yt-formats
+ contrib-libs-protobuf
+)
+target_proto_messages(yt-interface-protos PRIVATE
+ ${CMAKE_SOURCE_DIR}/mapreduce/yt/interface/protos/extension.proto
+ ${CMAKE_SOURCE_DIR}/mapreduce/yt/interface/protos/yamr.proto
+)
+target_proto_addincls(yt-interface-protos
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(yt-interface-protos
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/mapreduce/yt/interface/protos/CMakeLists.linux-aarch64.txt b/mapreduce/yt/interface/protos/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..20741a6631
--- /dev/null
+++ b/mapreduce/yt/interface/protos/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,58 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(yt-interface-protos)
+target_link_libraries(yt-interface-protos PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ yt_proto-yt-formats
+ contrib-libs-protobuf
+)
+target_proto_messages(yt-interface-protos PRIVATE
+ ${CMAKE_SOURCE_DIR}/mapreduce/yt/interface/protos/extension.proto
+ ${CMAKE_SOURCE_DIR}/mapreduce/yt/interface/protos/yamr.proto
+)
+target_proto_addincls(yt-interface-protos
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(yt-interface-protos
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/mapreduce/yt/interface/protos/CMakeLists.linux-x86_64.txt b/mapreduce/yt/interface/protos/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..20741a6631
--- /dev/null
+++ b/mapreduce/yt/interface/protos/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,58 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(yt-interface-protos)
+target_link_libraries(yt-interface-protos PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ yt_proto-yt-formats
+ contrib-libs-protobuf
+)
+target_proto_messages(yt-interface-protos PRIVATE
+ ${CMAKE_SOURCE_DIR}/mapreduce/yt/interface/protos/extension.proto
+ ${CMAKE_SOURCE_DIR}/mapreduce/yt/interface/protos/yamr.proto
+)
+target_proto_addincls(yt-interface-protos
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(yt-interface-protos
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/mapreduce/yt/interface/protos/CMakeLists.txt b/mapreduce/yt/interface/protos/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/mapreduce/yt/interface/protos/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/mapreduce/yt/interface/protos/CMakeLists.windows-x86_64.txt b/mapreduce/yt/interface/protos/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..e0f89c3a9a
--- /dev/null
+++ b/mapreduce/yt/interface/protos/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,57 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(yt-interface-protos)
+target_link_libraries(yt-interface-protos PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ yt_proto-yt-formats
+ contrib-libs-protobuf
+)
+target_proto_messages(yt-interface-protos PRIVATE
+ ${CMAKE_SOURCE_DIR}/mapreduce/yt/interface/protos/extension.proto
+ ${CMAKE_SOURCE_DIR}/mapreduce/yt/interface/protos/yamr.proto
+)
+target_proto_addincls(yt-interface-protos
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(yt-interface-protos
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/ydb/library/yql/public/CMakeLists.txt b/ydb/library/yql/public/CMakeLists.txt
index 823731a6d8..812b435b79 100644
--- a/ydb/library/yql/public/CMakeLists.txt
+++ b/ydb/library/yql/public/CMakeLists.txt
@@ -9,5 +9,6 @@
add_subdirectory(decimal)
add_subdirectory(fastcheck)
add_subdirectory(issue)
+add_subdirectory(purecalc)
add_subdirectory(types)
add_subdirectory(udf)
diff --git a/ydb/library/yql/public/purecalc/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/public/purecalc/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..a417180394
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,27 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(common)
+add_subdirectory(examples)
+add_subdirectory(helpers)
+add_subdirectory(io_specs)
+add_subdirectory(ut)
+
+add_library(yql-public-purecalc)
+target_compile_options(yql-public-purecalc PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_link_libraries(yql-public-purecalc PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ udf-service-exception_policy
+ public-purecalc-common
+)
+target_sources(yql-public-purecalc PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/purecalc.cpp
+)
diff --git a/ydb/library/yql/public/purecalc/CMakeLists.linux-aarch64.txt b/ydb/library/yql/public/purecalc/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..7d72508c1f
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,28 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(common)
+add_subdirectory(examples)
+add_subdirectory(helpers)
+add_subdirectory(io_specs)
+add_subdirectory(ut)
+
+add_library(yql-public-purecalc)
+target_compile_options(yql-public-purecalc PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_link_libraries(yql-public-purecalc PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ udf-service-exception_policy
+ public-purecalc-common
+)
+target_sources(yql-public-purecalc PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/purecalc.cpp
+)
diff --git a/ydb/library/yql/public/purecalc/CMakeLists.linux-x86_64.txt b/ydb/library/yql/public/purecalc/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..7d72508c1f
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,28 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(common)
+add_subdirectory(examples)
+add_subdirectory(helpers)
+add_subdirectory(io_specs)
+add_subdirectory(ut)
+
+add_library(yql-public-purecalc)
+target_compile_options(yql-public-purecalc PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_link_libraries(yql-public-purecalc PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ udf-service-exception_policy
+ public-purecalc-common
+)
+target_sources(yql-public-purecalc PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/purecalc.cpp
+)
diff --git a/ydb/library/yql/public/purecalc/CMakeLists.txt b/ydb/library/yql/public/purecalc/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/ydb/library/yql/public/purecalc/CMakeLists.windows-x86_64.txt b/ydb/library/yql/public/purecalc/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..a417180394
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,27 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(common)
+add_subdirectory(examples)
+add_subdirectory(helpers)
+add_subdirectory(io_specs)
+add_subdirectory(ut)
+
+add_library(yql-public-purecalc)
+target_compile_options(yql-public-purecalc PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_link_libraries(yql-public-purecalc PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ udf-service-exception_policy
+ public-purecalc-common
+)
+target_sources(yql-public-purecalc PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/purecalc.cpp
+)
diff --git a/ydb/library/yql/public/purecalc/common/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/public/purecalc/common/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..301054c4de
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/common/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,65 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_enum_parser_bin
+ TOOL_enum_parser_dependency
+ tools/enum_parser/enum_parser
+ enum_parser
+)
+
+add_library(public-purecalc-common)
+target_compile_options(public-purecalc-common PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_link_libraries(public-purecalc-common PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ yql-sql-pg
+ library-yql-ast
+ yql-core-services
+ core-services-mounts
+ yql-core-user_data
+ minikql-comp_nodes-llvm
+ yql-utils-backtrace
+ yql-utils-log
+ library-yql-core
+ yql-core-type_ann
+ yql-parser-pg_wrapper
+ providers-common-codec
+ providers-common-comp_nodes
+ providers-common-mkql
+ providers-common-provider
+ common-schema-expr
+ providers-common-udf_resolve
+ tools-enum_parser-enum_serialization_runtime
+)
+target_sources(public-purecalc-common PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/compile_mkql.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/fwd.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/inspect_input.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/interface.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/logger_init.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/names.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/processor_mode.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/program_factory.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/transformations/align_output_schema.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/transformations/extract_used_columns.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/transformations/output_columns_filter.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/transformations/replace_table_reads.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/transformations/type_annotation.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/type_from_schema.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/worker.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/worker_factory.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/wrappers.cpp
+)
+generate_enum_serilization(public-purecalc-common
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/interface.h
+ INCLUDE_HEADERS
+ ydb/library/yql/public/purecalc/common/interface.h
+)
diff --git a/ydb/library/yql/public/purecalc/common/CMakeLists.linux-aarch64.txt b/ydb/library/yql/public/purecalc/common/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..b6d800550b
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/common/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,66 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_enum_parser_bin
+ TOOL_enum_parser_dependency
+ tools/enum_parser/enum_parser
+ enum_parser
+)
+
+add_library(public-purecalc-common)
+target_compile_options(public-purecalc-common PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_link_libraries(public-purecalc-common PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ yql-sql-pg
+ library-yql-ast
+ yql-core-services
+ core-services-mounts
+ yql-core-user_data
+ minikql-comp_nodes-llvm
+ yql-utils-backtrace
+ yql-utils-log
+ library-yql-core
+ yql-core-type_ann
+ yql-parser-pg_wrapper
+ providers-common-codec
+ providers-common-comp_nodes
+ providers-common-mkql
+ providers-common-provider
+ common-schema-expr
+ providers-common-udf_resolve
+ tools-enum_parser-enum_serialization_runtime
+)
+target_sources(public-purecalc-common PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/compile_mkql.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/fwd.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/inspect_input.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/interface.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/logger_init.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/names.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/processor_mode.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/program_factory.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/transformations/align_output_schema.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/transformations/extract_used_columns.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/transformations/output_columns_filter.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/transformations/replace_table_reads.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/transformations/type_annotation.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/type_from_schema.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/worker.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/worker_factory.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/wrappers.cpp
+)
+generate_enum_serilization(public-purecalc-common
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/interface.h
+ INCLUDE_HEADERS
+ ydb/library/yql/public/purecalc/common/interface.h
+)
diff --git a/ydb/library/yql/public/purecalc/common/CMakeLists.linux-x86_64.txt b/ydb/library/yql/public/purecalc/common/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..b6d800550b
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/common/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,66 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_enum_parser_bin
+ TOOL_enum_parser_dependency
+ tools/enum_parser/enum_parser
+ enum_parser
+)
+
+add_library(public-purecalc-common)
+target_compile_options(public-purecalc-common PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_link_libraries(public-purecalc-common PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ yql-sql-pg
+ library-yql-ast
+ yql-core-services
+ core-services-mounts
+ yql-core-user_data
+ minikql-comp_nodes-llvm
+ yql-utils-backtrace
+ yql-utils-log
+ library-yql-core
+ yql-core-type_ann
+ yql-parser-pg_wrapper
+ providers-common-codec
+ providers-common-comp_nodes
+ providers-common-mkql
+ providers-common-provider
+ common-schema-expr
+ providers-common-udf_resolve
+ tools-enum_parser-enum_serialization_runtime
+)
+target_sources(public-purecalc-common PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/compile_mkql.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/fwd.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/inspect_input.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/interface.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/logger_init.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/names.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/processor_mode.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/program_factory.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/transformations/align_output_schema.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/transformations/extract_used_columns.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/transformations/output_columns_filter.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/transformations/replace_table_reads.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/transformations/type_annotation.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/type_from_schema.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/worker.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/worker_factory.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/wrappers.cpp
+)
+generate_enum_serilization(public-purecalc-common
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/interface.h
+ INCLUDE_HEADERS
+ ydb/library/yql/public/purecalc/common/interface.h
+)
diff --git a/ydb/library/yql/public/purecalc/common/CMakeLists.txt b/ydb/library/yql/public/purecalc/common/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/common/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/ydb/library/yql/public/purecalc/common/CMakeLists.windows-x86_64.txt b/ydb/library/yql/public/purecalc/common/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..301054c4de
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/common/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,65 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_enum_parser_bin
+ TOOL_enum_parser_dependency
+ tools/enum_parser/enum_parser
+ enum_parser
+)
+
+add_library(public-purecalc-common)
+target_compile_options(public-purecalc-common PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_link_libraries(public-purecalc-common PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ yql-sql-pg
+ library-yql-ast
+ yql-core-services
+ core-services-mounts
+ yql-core-user_data
+ minikql-comp_nodes-llvm
+ yql-utils-backtrace
+ yql-utils-log
+ library-yql-core
+ yql-core-type_ann
+ yql-parser-pg_wrapper
+ providers-common-codec
+ providers-common-comp_nodes
+ providers-common-mkql
+ providers-common-provider
+ common-schema-expr
+ providers-common-udf_resolve
+ tools-enum_parser-enum_serialization_runtime
+)
+target_sources(public-purecalc-common PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/compile_mkql.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/fwd.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/inspect_input.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/interface.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/logger_init.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/names.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/processor_mode.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/program_factory.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/transformations/align_output_schema.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/transformations/extract_used_columns.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/transformations/output_columns_filter.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/transformations/replace_table_reads.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/transformations/type_annotation.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/type_from_schema.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/worker.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/worker_factory.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/wrappers.cpp
+)
+generate_enum_serilization(public-purecalc-common
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/interface.h
+ INCLUDE_HEADERS
+ ydb/library/yql/public/purecalc/common/interface.h
+)
diff --git a/ydb/library/yql/public/purecalc/common/compile_mkql.cpp b/ydb/library/yql/public/purecalc/common/compile_mkql.cpp
new file mode 100644
index 0000000000..7a9946890c
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/common/compile_mkql.cpp
@@ -0,0 +1,115 @@
+#include "compile_mkql.h"
+
+#include <ydb/library/yql/providers/common/mkql/yql_provider_mkql.h>
+#include <ydb/library/yql/providers/common/mkql/yql_type_mkql.h>
+#include <ydb/library/yql/core/yql_user_data_storage.h>
+#include <ydb/library/yql/public/purecalc/common/names.h>
+
+#include <util/stream/file.h>
+
+namespace NYql::NPureCalc {
+
+namespace {
+
+NCommon::IMkqlCallableCompiler::TCompiler MakeSelfCallableCompiler() {
+ return [](const TExprNode& node, NCommon::TMkqlBuildContext& ctx) {
+ MKQL_ENSURE(node.ChildrenSize() == 1, "Self takes exactly 1 argument");
+ const auto* argument = node.Child(0);
+ MKQL_ENSURE(argument->IsAtom(), "Self argument must be atom");
+ ui32 inputIndex = 0;
+ MKQL_ENSURE(TryFromString(argument->Content(), inputIndex), "Self argument must be UI32");
+ auto type = NCommon::BuildType(node, *node.GetTypeAnn(), ctx.ProgramBuilder);
+ NKikimr::NMiniKQL::TCallableBuilder call(ctx.ProgramBuilder.GetTypeEnvironment(), node.Content(), type);
+ call.Add(ctx.ProgramBuilder.NewDataLiteral<ui32>(inputIndex));
+ return NKikimr::NMiniKQL::TRuntimeNode(call.Build(), false);
+ };
+}
+
+NCommon::IMkqlCallableCompiler::TCompiler MakeFilePathCallableCompiler(const TUserDataTable& userData) {
+ return [&](const TExprNode& node, NCommon::TMkqlBuildContext& ctx) {
+ const TString name(node.Child(0)->Content());
+ auto block = TUserDataStorage::FindUserDataBlock(userData, TUserDataKey::File(name));
+ if (!block) {
+ auto blockKey = TUserDataKey::File(GetDefaultFilePrefix() + name);
+ block = TUserDataStorage::FindUserDataBlock(userData, blockKey);
+ }
+ MKQL_ENSURE(block, "file not found: " << name);
+ MKQL_ENSURE(block->Type == EUserDataType::PATH,
+ "FilePath not supported for non-filesystem user data, name: "
+ << name << ", block type: " << block->Type);
+ return ctx.ProgramBuilder.NewDataLiteral<NKikimr::NUdf::EDataSlot::String>(block->Data);
+ };
+}
+
+NCommon::IMkqlCallableCompiler::TCompiler MakeFileContentCallableCompiler(const TUserDataTable& userData) {
+ return [&](const TExprNode& node, NCommon::TMkqlBuildContext& ctx) {
+ const TString name(node.Child(0)->Content());
+ auto block = TUserDataStorage::FindUserDataBlock(userData, TUserDataKey::File(name));
+ if (!block) {
+ auto blockKey = TUserDataKey::File(GetDefaultFilePrefix() + name);
+ block = TUserDataStorage::FindUserDataBlock(userData, blockKey);
+ }
+ MKQL_ENSURE(block, "file not found: " << name);
+ if (block->Type == EUserDataType::PATH) {
+ auto content = TFileInput(block->Data).ReadAll();
+ return ctx.ProgramBuilder.NewDataLiteral<NKikimr::NUdf::EDataSlot::String>(content);
+ } else if (block->Type == EUserDataType::RAW_INLINE_DATA) {
+ return ctx.ProgramBuilder.NewDataLiteral<NKikimr::NUdf::EDataSlot::String>(block->Data);
+ } else {
+ // TODO support EUserDataType::URL
+ MKQL_ENSURE(false, "user data blocks of type URL are not supported by FileContent: " << name);
+ Y_UNREACHABLE();
+ }
+ };
+}
+
+NCommon::IMkqlCallableCompiler::TCompiler MakeFolderPathCallableCompiler(const TUserDataTable& userData) {
+ return [&](const TExprNode& node, NCommon::TMkqlBuildContext& ctx) {
+ const TString name(node.Child(0)->Content());
+ auto folderName = TUserDataStorage::MakeFolderName(name);
+ TMaybe<TString> folderPath;
+ for (const auto& x : userData) {
+ if (!x.first.Alias().StartsWith(folderName)) {
+ continue;
+ }
+
+ MKQL_ENSURE(x.second.Type == EUserDataType::PATH,
+ "FilePath not supported for non-file data block, name: "
+ << x.first.Alias() << ", block type: " << x.second.Type);
+
+ auto pathPrefixLength = x.second.Data.size() - (x.first.Alias().size() - folderName.size());
+ auto newFolderPath = x.second.Data.substr(0, pathPrefixLength);
+ if (!folderPath) {
+ folderPath = newFolderPath;
+ } else {
+ MKQL_ENSURE(*folderPath == newFolderPath,
+ "file " << x.second.Data << " is out of directory " << *folderPath);
+ }
+ }
+ return ctx.ProgramBuilder.NewDataLiteral<NKikimr::NUdf::EDataSlot::String>(*folderPath);
+ };
+}
+
+}
+
+NKikimr::NMiniKQL::TRuntimeNode CompileMkql(const TExprNode::TPtr& exprRoot, TExprContext& exprCtx,
+ const NKikimr::NMiniKQL::IFunctionRegistry& funcRegistry, const NKikimr::NMiniKQL::TTypeEnvironment& env, const TUserDataTable& userData)
+{
+ NCommon::TMkqlCommonCallableCompiler compiler;
+
+ compiler.AddCallable(PurecalcInputCallableName, MakeSelfCallableCompiler());
+ compiler.OverrideCallable("FileContent", MakeFileContentCallableCompiler(userData));
+ compiler.OverrideCallable("FilePath", MakeFilePathCallableCompiler(userData));
+ compiler.OverrideCallable("FolderPath", MakeFolderPathCallableCompiler(userData));
+
+ // Prepare build context
+
+ NKikimr::NMiniKQL::TProgramBuilder pgmBuilder(env, funcRegistry);
+ NCommon::TMkqlBuildContext buildCtx(compiler, pgmBuilder, exprCtx);
+
+ // Build the root MKQL node
+
+ return NCommon::MkqlBuildExpr(*exprRoot, buildCtx);
+}
+
+} // NYql::NPureCalc
diff --git a/ydb/library/yql/public/purecalc/common/compile_mkql.h b/ydb/library/yql/public/purecalc/common/compile_mkql.h
new file mode 100644
index 0000000000..566459d396
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/common/compile_mkql.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include <ydb/library/yql/public/purecalc/common/interface.h>
+#include <ydb/library/yql/minikql/mkql_node.h>
+#include <ydb/library/yql/ast/yql_expr.h>
+#include <ydb/library/yql/core/yql_user_data.h>
+
+namespace NYql {
+ namespace NPureCalc {
+ /**
+ * Compile expr to mkql byte-code
+ */
+
+ NKikimr::NMiniKQL::TRuntimeNode CompileMkql(const TExprNode::TPtr& exprRoot, TExprContext& exprCtx,
+ const NKikimr::NMiniKQL::IFunctionRegistry& funcRegistry, const NKikimr::NMiniKQL::TTypeEnvironment& env, const TUserDataTable& userData);
+ }
+}
diff --git a/ydb/library/yql/public/purecalc/common/fwd.cpp b/ydb/library/yql/public/purecalc/common/fwd.cpp
new file mode 100644
index 0000000000..4214b6df83
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/common/fwd.cpp
@@ -0,0 +1 @@
+#include "fwd.h"
diff --git a/ydb/library/yql/public/purecalc/common/fwd.h b/ydb/library/yql/public/purecalc/common/fwd.h
new file mode 100644
index 0000000000..22df90a6b2
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/common/fwd.h
@@ -0,0 +1,56 @@
+#pragma once
+
+#include <util/generic/fwd.h>
+#include <memory>
+
+namespace NYql::NPureCalc {
+ class TCompileError;
+
+ template <typename>
+ class IConsumer;
+
+ template <typename>
+ class IStream;
+
+ class IProgramFactory;
+
+ class IWorkerFactory;
+
+ class IPullStreamWorkerFactory;
+
+ class IPullListWorkerFactory;
+
+ class IPushStreamWorkerFactory;
+
+ class IWorker;
+
+ class IPullStreamWorker;
+
+ class IPullListWorker;
+
+ class IPushStreamWorker;
+
+ class TInputSpecBase;
+
+ class TOutputSpecBase;
+
+ class IProgram;
+
+ template <typename, typename, typename>
+ class TProgramCommon;
+
+ template <typename, typename>
+ class TPullStreamProgram;
+
+ template <typename, typename>
+ class TPullListProgram;
+
+ template <typename, typename>
+ class TPushStreamProgram;
+
+ using IProgramFactoryPtr = TIntrusivePtr<IProgramFactory>;
+ using IWorkerFactoryPtr = std::shared_ptr<IWorkerFactory>;
+ using IPullStreamWorkerFactoryPtr = std::shared_ptr<IPullStreamWorkerFactory>;
+ using IPullListWorkerFactoryPtr = std::shared_ptr<IPullListWorkerFactory>;
+ using IPushStreamWorkerFactoryPtr = std::shared_ptr<IPushStreamWorkerFactory>;
+}
diff --git a/ydb/library/yql/public/purecalc/common/inspect_input.cpp b/ydb/library/yql/public/purecalc/common/inspect_input.cpp
new file mode 100644
index 0000000000..c8fbb6cd58
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/common/inspect_input.cpp
@@ -0,0 +1,33 @@
+#include "inspect_input.h"
+
+#include <ydb/library/yql/core/yql_expr_type_annotation.h>
+
+namespace NYql::NPureCalc {
+ bool TryFetchInputIndexFromSelf(const TExprNode& node, TExprContext& ctx, ui32 inputsCount, ui32& result) {
+ TIssueScopeGuard issueSope(ctx.IssueManager, [&]() {
+ return MakeIntrusive<TIssue>(ctx.GetPosition(node.Pos()), TStringBuilder() << "At function: " << node.Content());
+ });
+
+ if (!EnsureArgsCount(node, 1, ctx)) {
+ return false;
+ }
+
+ if (!EnsureAtom(*node.Child(0), ctx)) {
+ return false;
+ }
+
+ if (!TryFromString(node.Child(0)->Content(), result)) {
+ auto message = TStringBuilder() << "Index " << TString{node.Child(0)->Content()}.Quote() << " isn't UI32";
+ ctx.AddError(TIssue(ctx.GetPosition(node.Child(0)->Pos()), std::move(message)));
+ return false;
+ }
+
+ if (result >= inputsCount) {
+ auto message = TStringBuilder() << "Invalid input index: " << result << " is out of range [0;" << inputsCount << ")";
+ ctx.AddError(TIssue(ctx.GetPosition(node.Child(0)->Pos()), std::move(message)));
+ return false;
+ }
+
+ return true;
+ }
+}
diff --git a/ydb/library/yql/public/purecalc/common/inspect_input.h b/ydb/library/yql/public/purecalc/common/inspect_input.h
new file mode 100644
index 0000000000..fbc4413227
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/common/inspect_input.h
@@ -0,0 +1,7 @@
+#pragma once
+
+#include <ydb/library/yql/ast/yql_expr.h>
+
+namespace NYql::NPureCalc {
+ bool TryFetchInputIndexFromSelf(const TExprNode&, TExprContext&, ui32, ui32&);
+}
diff --git a/ydb/library/yql/public/purecalc/common/interface.cpp b/ydb/library/yql/public/purecalc/common/interface.cpp
new file mode 100644
index 0000000000..b22c65e482
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/common/interface.cpp
@@ -0,0 +1,116 @@
+#include "interface.h"
+
+#include <ydb/library/yql/providers/common/codec/yql_codec_type_flags.h>
+#include <ydb/library/yql/public/purecalc/common/logger_init.h>
+#include <ydb/library/yql/public/purecalc/common/program_factory.h>
+
+using namespace NYql;
+using namespace NYql::NPureCalc;
+
+TLoggingOptions::TLoggingOptions()
+ : LogLevel_(ELogPriority::TLOG_ERR)
+ , LogDestination(&Clog)
+{
+}
+
+TLoggingOptions& TLoggingOptions::SetLogLevel(ELogPriority logLevel) {
+ LogLevel_ = logLevel;
+ return *this;
+}
+
+TLoggingOptions& TLoggingOptions::SetLogDestination(IOutputStream* logDestination) {
+ LogDestination = logDestination;
+ return *this;
+}
+
+TProgramFactoryOptions::TProgramFactoryOptions()
+ : UdfsDir_("")
+ , UserData_()
+ , LLVMSettings("OFF")
+ , CountersProvider(nullptr)
+ , NativeYtTypeFlags(0)
+ , UseSystemColumns(false)
+ , UseWorkerPool(true)
+{
+}
+
+TProgramFactoryOptions& TProgramFactoryOptions::SetUDFsDir(TStringBuf dir) {
+ UdfsDir_ = dir;
+ return *this;
+}
+
+TProgramFactoryOptions& TProgramFactoryOptions::AddLibrary(NUserData::EDisposition disposition, TStringBuf name, TStringBuf content) {
+ auto& ref = UserData_.emplace_back();
+
+ ref.Type_ = NUserData::EType::LIBRARY;
+ ref.Disposition_ = disposition;
+ ref.Name_ = name;
+ ref.Content_ = content;
+
+ return *this;
+}
+
+TProgramFactoryOptions& TProgramFactoryOptions::AddFile(NUserData::EDisposition disposition, TStringBuf name, TStringBuf content) {
+ auto& ref = UserData_.emplace_back();
+
+ ref.Type_ = NUserData::EType::FILE;
+ ref.Disposition_ = disposition;
+ ref.Name_ = name;
+ ref.Content_ = content;
+
+ return *this;
+}
+
+TProgramFactoryOptions& TProgramFactoryOptions::AddUDF(NUserData::EDisposition disposition, TStringBuf name, TStringBuf content) {
+ auto& ref = UserData_.emplace_back();
+
+ ref.Type_ = NUserData::EType::UDF;
+ ref.Disposition_ = disposition;
+ ref.Name_ = name;
+ ref.Content_ = content;
+
+ return *this;
+}
+
+TProgramFactoryOptions& TProgramFactoryOptions::SetLLVMSettings(TStringBuf llvm_settings) {
+ LLVMSettings = llvm_settings;
+ return *this;
+}
+
+TProgramFactoryOptions& TProgramFactoryOptions::SetCountersProvider(NKikimr::NUdf::ICountersProvider* countersProvider) {
+ CountersProvider = countersProvider;
+ return *this;
+}
+
+TProgramFactoryOptions& TProgramFactoryOptions::SetUseNativeYtTypes(bool useNativeTypes) {
+ NativeYtTypeFlags = useNativeTypes ? NTCF_PRODUCTION : NTCF_NONE;
+ return *this;
+}
+
+TProgramFactoryOptions& TProgramFactoryOptions::SetNativeYtTypeFlags(ui64 nativeTypeFlags) {
+ NativeYtTypeFlags = nativeTypeFlags;
+ return *this;
+}
+
+TProgramFactoryOptions& TProgramFactoryOptions::SetDeterministicTimeProviderSeed(TMaybe<ui64> seed) {
+ DeterministicTimeProviderSeed = seed;
+ return *this;
+}
+
+TProgramFactoryOptions& TProgramFactoryOptions::SetUseSystemColumns(bool useSystemColumns) {
+ UseSystemColumns = useSystemColumns;
+ return *this;
+}
+
+TProgramFactoryOptions& TProgramFactoryOptions::SetUseWorkerPool(bool useWorkerPool) {
+ UseWorkerPool = useWorkerPool;
+ return *this;
+}
+
+void NYql::NPureCalc::ConfigureLogging(const TLoggingOptions& options) {
+ InitLogging(options);
+}
+
+IProgramFactoryPtr NYql::NPureCalc::MakeProgramFactory(const TProgramFactoryOptions& options) {
+ return new TProgramFactory(options);
+}
diff --git a/ydb/library/yql/public/purecalc/common/interface.h b/ydb/library/yql/public/purecalc/common/interface.h
new file mode 100644
index 0000000000..49bfd8c22a
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/common/interface.h
@@ -0,0 +1,1137 @@
+#pragma once
+
+#include "fwd.h"
+#include "wrappers.h"
+
+#include <ydb/library/yql/core/user_data/yql_user_data.h>
+
+#include <ydb/library/yql/public/udf/udf_value.h>
+#include <ydb/library/yql/public/udf/udf_counter.h>
+#include <ydb/library/yql/public/udf/udf_registrator.h>
+
+#include <ydb/library/yql/public/issue/yql_issue.h>
+#include <library/cpp/yson/node/node.h>
+
+#include <library/cpp/logger/priority.h>
+
+#include <util/generic/ptr.h>
+#include <util/generic/maybe.h>
+#include <util/generic/hash_set.h>
+#include <util/generic/string.h>
+#include <util/stream/output.h>
+
+class ITimeProvider;
+
+namespace NKikimr {
+ namespace NMiniKQL {
+ class TScopedAlloc;
+ class IComputationGraph;
+ class IFunctionRegistry;
+ class TTypeEnvironment;
+ class TType;
+ class TStructType;
+ }
+}
+
+namespace NYql {
+ namespace NPureCalc {
+ /**
+ * SQL or s-expression translation error.
+ */
+ class TCompileError: public yexception {
+ private:
+ TString Yql_;
+ TString Issues_;
+
+ public:
+ // TODO: maybe accept an actual list of issues here?
+ // See https://a.yandex-team.ru/arc/review/439403/details#comment-778237
+ TCompileError(TString yql, TString issues)
+ : Yql_(std::move(yql))
+ , Issues_(std::move(issues))
+ {
+ }
+
+ public:
+ /**
+ * Get the sql query which caused the error (if there is one available).
+ */
+ const TString& GetYql() const {
+ return Yql_;
+ }
+
+ /**
+ * Get detailed description for all errors and warnings that happened during sql translation.
+ */
+ const TString& GetIssues() const {
+ return Issues_;
+ }
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ /**
+ * A generic input stream of objects.
+ */
+ template <typename T>
+ class IStream {
+ public:
+ virtual ~IStream() = default;
+
+ public:
+ /**
+ * Pops and returns a next value in the stream. If the stream is finished, should return some sentinel object.
+ *
+ * Depending on return type, this function may not transfer object ownership to a user.
+ * Thus, the stream may manage the returned object * itself.
+ * That is, the returned object's lifetime may be bound to the input stream lifetime; it may be destroyed
+ * upon calling Fetch() or upon destroying the stream, whichever happens first.
+ */
+ virtual T Fetch() = 0;
+ };
+
+ /**
+ * Create a new stream which applies the given functor to the elements of the original stream.
+ */
+ template <typename TOld, typename TNew, typename TFunctor>
+ inline THolder<IStream<TNew>> MapStream(THolder<IStream<TOld>> stream, TFunctor functor) {
+ return THolder(new NPrivate::TMappingStream<TNew, TOld, TFunctor>(std::move(stream), std::move(functor)));
+ };
+
+ /**
+ * Convert stream of objects into a stream of potentially incompatible objects.
+ *
+ * This conversion applies static cast to the output of the original stream. Use with caution!
+ */
+ /// @{
+ template <
+ typename TNew, typename TOld,
+ std::enable_if_t<!std::is_same<TNew, TOld>::value>* = nullptr>
+ inline THolder<IStream<TNew>> ConvertStreamUnsafe(THolder<IStream<TOld>> stream) {
+ return MapStream<TOld, TNew>(std::move(stream), [](TOld x) -> TNew { return static_cast<TNew>(x); });
+ }
+ template <typename T>
+ inline THolder<IStream<T>> ConvertStreamUnsafe(THolder<IStream<T>> stream) {
+ return stream;
+ }
+ /// @}
+
+ /**
+ * Convert stream of objects into a stream of compatible objects.
+ *
+ * Note: each conversion adds one level of indirection so avoid them if possible.
+ */
+ template <typename TNew, typename TOld, std::enable_if_t<std::is_convertible<TOld, TNew>::value>* = nullptr>
+ inline THolder<IStream<TNew>> ConvertStream(THolder<IStream<TOld>> stream) {
+ return ConvertStreamUnsafe<TNew, TOld>(std::move(stream));
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ /**
+ * A generic push consumer.
+ */
+ template <typename T>
+ class IConsumer {
+ public:
+ virtual ~IConsumer() = default;
+
+ public:
+ /**
+ * Feed an object to consumer.
+ *
+ * Depending on argument type, the consumer may not take ownership of the passed object;
+ * in that case it is the caller responsibility to manage the object lifetime after passing it to this method.
+ *
+ * The passed object can be destroyed after the consumer returns from this function; the consumer should
+ * not store pointer to the passed object or the passed object itself without taking all necessary precautions
+ * to ensure that the pointer or the object stays valid after returning.
+ */
+ virtual void OnObject(T) = 0;
+
+ /**
+ * Close the consumer and run finalization logic. Calling OnObject after calling this function is an error.
+ */
+ virtual void OnFinish() = 0;
+ };
+
+ /**
+ * Create a new consumer which applies the given functor to objects before .
+ */
+ template <typename TOld, typename TNew, typename TFunctor>
+ inline THolder<IConsumer<TNew>> MapConsumer(THolder<IConsumer<TOld>> stream, TFunctor functor) {
+ return THolder(new NPrivate::TMappingConsumer<TNew, TOld, TFunctor>(std::move(stream), std::move(functor)));
+ };
+
+
+ /**
+ * Convert consumer of objects into a consumer of potentially incompatible objects.
+ *
+ * This conversion applies static cast to the input value. Use with caution.
+ */
+ /// @{
+ template <
+ typename TNew, typename TOld,
+ std::enable_if_t<!std::is_same<TNew, TOld>::value>* = nullptr>
+ inline THolder<IConsumer<TNew>> ConvertConsumerUnsafe(THolder<IConsumer<TOld>> consumer) {
+ return MapConsumer<TOld, TNew>(std::move(consumer), [](TNew x) -> TOld { return static_cast<TOld>(x); });
+ }
+ template <typename T>
+ inline THolder<IConsumer<T>> ConvertConsumerUnsafe(THolder<IConsumer<T>> consumer) {
+ return consumer;
+ }
+ /// @}
+
+ /**
+ * Convert consumer of objects into a consumer of compatible objects.
+ *
+ * Note: each conversion adds one level of indirection so avoid them if possible.
+ */
+ template <typename TNew, typename TOld, std::enable_if_t<std::is_convertible<TNew, TOld>::value>* = nullptr>
+ inline THolder<IConsumer<TNew>> ConvertConsumer(THolder<IConsumer<TOld>> consumer) {
+ return ConvertConsumerUnsafe<TNew, TOld>(std::move(consumer));
+ }
+
+ /**
+ * Create a consumer which holds a non-owning pointer to the given consumer
+ * and passes all messages to the latter.
+ */
+ template <typename T, typename C>
+ THolder<NPrivate::TNonOwningConsumer<T, C>> MakeNonOwningConsumer(C consumer) {
+ return MakeHolder<NPrivate::TNonOwningConsumer<T, C>>(consumer);
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ /**
+ * Logging options.
+ */
+ struct TLoggingOptions final {
+ public:
+ /// Logging level for messages generated during compilation.
+ ELogPriority LogLevel_; // TODO: rename to LogLevel
+
+ /// Where to write log messages.
+ IOutputStream* LogDestination;
+
+ public:
+ TLoggingOptions();
+ /**
+ * Set a new logging level.
+ *
+ * @return reference to self, to allow method chaining.
+ */
+ TLoggingOptions& SetLogLevel(ELogPriority);
+
+ /**
+ * Set a new logging destination.
+ *
+ * @return reference to self, to allow method chaining.
+ */
+ TLoggingOptions& SetLogDestination(IOutputStream*);
+ };
+
+ /**
+ * General options for program factory.
+ */
+ struct TProgramFactoryOptions final {
+ public:
+ /// Path to a directory with compiled UDFs. Leave empty to disable loading external UDFs.
+ TString UdfsDir_; // TODO: rename to UDFDir
+
+ /// List of available external resources, e.g. files, UDFs, libraries.
+ TVector<NUserData::TUserData> UserData_; // TODO: rename to UserData
+
+ /// LLVM settings. Assign "OFF" to disable LLVM, empty string for default settings.
+ TString LLVMSettings;
+
+ /// Provider for generic counters which can be used to export statistics from UDFs.
+ NKikimr::NUdf::ICountersProvider* CountersProvider;
+
+ /// YT Type V3 flags for Skiff/Yson serialization.
+ ui64 NativeYtTypeFlags;
+
+ /// Seed for deterministic time provider
+ TMaybe<ui64> DeterministicTimeProviderSeed;
+
+ /// Use special system columns to support tables naming (supports non empty ``TablePath()``/``TableName()``)
+ bool UseSystemColumns;
+
+ /// Reuse allocated workers
+ bool UseWorkerPool;
+
+ public:
+ TProgramFactoryOptions();
+
+ public:
+ /**
+ * Set a new path to a directory with UDFs.
+ *
+ * @return reference to self, to allow method chaining.
+ */
+ TProgramFactoryOptions& SetUDFsDir(TStringBuf);
+
+ /**
+ * Add a new library to the UserData list.
+ *
+ * @param disposition where the resource resides, e.g. on filesystem, in memory, etc.
+ * NB: URL disposition is not supported.
+ * @param name name of the resource.
+ * @param content depending on disposition, either path to the resource or its content.
+ * @return reference to self, to allow method chaining.
+ */
+ TProgramFactoryOptions& AddLibrary(NUserData::EDisposition disposition, TStringBuf name, TStringBuf content);
+
+ /**
+ * Add a new file to the UserData list.
+ *
+ * @param disposition where the resource resides, e.g. on filesystem, in memory, etc.
+ * NB: URL disposition is not supported.
+ * @param name name of the resource.
+ * @param content depending on disposition, either path to the resource or its content.
+ * @return reference to self, to allow method chaining.
+ */
+ TProgramFactoryOptions& AddFile(NUserData::EDisposition disposition, TStringBuf name, TStringBuf content);
+
+ /**
+ * Add a new UDF to the UserData list.
+ *
+ * @param disposition where the resource resides, e.g. on filesystem, in memory, etc.
+ * NB: URL disposition is not supported.
+ * @param name name of the resource.
+ * @param content depending on disposition, either path to the resource or its content.
+ * @return reference to self, to allow method chaining.
+ */
+ TProgramFactoryOptions& AddUDF(NUserData::EDisposition disposition, TStringBuf name, TStringBuf content);
+
+ /**
+ * Set new LLVM settings.
+ *
+ * @return reference to self, to allow method chaining.
+ */
+ TProgramFactoryOptions& SetLLVMSettings(TStringBuf llvm_settings);
+
+ /**
+ * Set new counters provider. Passed pointer should stay alive for as long as the processor factory
+ * stays alive.
+ *
+ * @return reference to self, to allow method chaining.
+ */
+ TProgramFactoryOptions& SetCountersProvider(NKikimr::NUdf::ICountersProvider* countersProvider);
+
+ /**
+ * Set new YT Type V3 mode. Deprecated method. Use SetNativeYtTypeFlags instead
+ *
+ * @return reference to self, to allow method chaining.
+ */
+ TProgramFactoryOptions& SetUseNativeYtTypes(bool useNativeTypes);
+
+ /**
+ * Set YT Type V3 flags.
+ *
+ * @return reference to self, to allow method chaining.
+ */
+ TProgramFactoryOptions& SetNativeYtTypeFlags(ui64 nativeTypeFlags);
+
+ /**
+ * Set seed for deterministic time provider.
+ *
+ * @return reference to self, to allow method chaining.
+ */
+ TProgramFactoryOptions& SetDeterministicTimeProviderSeed(TMaybe<ui64> seed);
+
+ /**
+ * Set new flag whether to allow using system columns or not.
+ *
+ * @return reference to self, to allow method chaining.
+ */
+ TProgramFactoryOptions& SetUseSystemColumns(bool useSystemColumns);
+
+ /**
+ * Set new flag whether to allow reusing workers or not.
+ *
+ * @return reference to self, to allow method chaining.
+ */
+ TProgramFactoryOptions& SetUseWorkerPool(bool useWorkerPool);
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ /**
+ * What exactly are we parsing: SQL or an s-expression.
+ */
+ enum class ETranslationMode {
+ SQL /* "SQL" */,
+ SExpr /* "s-expression" */,
+ Mkql /* "mkql" */
+ };
+
+ /**
+ * A facility for compiling sql and s-expressions and making programs from them.
+ */
+ class IProgramFactory: public TThrRefBase {
+ protected:
+ virtual IPullStreamWorkerFactoryPtr MakePullStreamWorkerFactory(const TInputSpecBase&, const TOutputSpecBase&, TString, ETranslationMode, ui16) = 0;
+ virtual IPullListWorkerFactoryPtr MakePullListWorkerFactory(const TInputSpecBase&, const TOutputSpecBase&, TString, ETranslationMode, ui16) = 0;
+ virtual IPushStreamWorkerFactoryPtr MakePushStreamWorkerFactory(const TInputSpecBase&, const TOutputSpecBase&, TString, ETranslationMode, ui16) = 0;
+
+ public:
+ /**
+ * Add new udf module. It's not specified whether adding new modules will affect existing programs
+ * (theoretical answer is 'no').
+ */
+ virtual void AddUdfModule(const TStringBuf&, NKikimr::NUdf::TUniquePtr<NKikimr::NUdf::IUdfModule>&&) = 0;
+ // TODO: support setting udf modules via factory options.
+
+ /**
+ * Set new counters provider, override one that was specified via factory options. Note that existing
+ * programs will still reference the previous provider.
+ */
+ virtual void SetCountersProvider(NKikimr::NUdf::ICountersProvider*) = 0;
+ // TODO: support setting providers via factory options.
+
+ template <typename TInputSpec, typename TOutputSpec>
+ THolder<TPullStreamProgram<TInputSpec, TOutputSpec>> MakePullStreamProgram(
+ TInputSpec inputSpec, TOutputSpec outputSpec, TString query, ETranslationMode mode = ETranslationMode::SQL, ui16 syntaxVersion = 1
+ ) {
+ auto workerFactory = MakePullStreamWorkerFactory(inputSpec, outputSpec, std::move(query), mode, syntaxVersion);
+ return MakeHolder<TPullStreamProgram<TInputSpec, TOutputSpec>>(std::move(inputSpec), std::move(outputSpec), workerFactory);
+ }
+
+ template <typename TInputSpec, typename TOutputSpec>
+ THolder<TPullListProgram<TInputSpec, TOutputSpec>> MakePullListProgram(
+ TInputSpec inputSpec, TOutputSpec outputSpec, TString query, ETranslationMode mode = ETranslationMode::SQL, ui16 syntaxVersion = 1
+ ) {
+ auto workerFactory = MakePullListWorkerFactory(inputSpec, outputSpec, std::move(query), mode, syntaxVersion);
+ return MakeHolder<TPullListProgram<TInputSpec, TOutputSpec>>(std::move(inputSpec), std::move(outputSpec), workerFactory);
+ }
+
+ template <typename TInputSpec, typename TOutputSpec>
+ THolder<TPushStreamProgram<TInputSpec, TOutputSpec>> MakePushStreamProgram(
+ TInputSpec inputSpec, TOutputSpec outputSpec, TString query, ETranslationMode mode = ETranslationMode::SQL, ui16 syntaxVersion = 1
+ ) {
+ auto workerFactory = MakePushStreamWorkerFactory(inputSpec, outputSpec, std::move(query), mode, syntaxVersion);
+ return MakeHolder<TPushStreamProgram<TInputSpec, TOutputSpec>>(std::move(inputSpec), std::move(outputSpec), workerFactory);
+ }
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ /**
+ * A facility for creating workers. Despite being a part of a public API, worker factory is not used directly.
+ */
+ class IWorkerFactory: public std::enable_shared_from_this<IWorkerFactory> {
+ public:
+ virtual ~IWorkerFactory() = default;
+ /**
+ * Get input column names for specified input that are actually used in the query.
+ */
+ virtual const THashSet<TString>& GetUsedColumns(ui32) const = 0;
+ /**
+ * Overload for single-input programs.
+ */
+ virtual const THashSet<TString>& GetUsedColumns() const = 0;
+
+ /**
+ * Make input type schema for specified input as deduced by program optimizer. This schema is equivalent
+ * to one provided by input spec up to the order of the fields in structures.
+ */
+ virtual NYT::TNode MakeInputSchema(ui32) const = 0;
+ /**
+ * Overload for single-input programs.
+ */
+ virtual NYT::TNode MakeInputSchema() const = 0;
+
+ /**
+ * Make output type schema as deduced by program optimizer. If output spec provides its own schema, than
+ * this schema is equivalent to one provided by output spec up to the order of the fields in structures.
+ */
+ /// @{
+ /**
+ * Overload for single-table output programs (i.e. output type is struct).
+ */
+ virtual NYT::TNode MakeOutputSchema() const = 0;
+ /**
+ * Overload for multi-table output programs (i.e. output type is variant over tuple).
+ */
+ virtual NYT::TNode MakeOutputSchema(ui32) const = 0;
+ /**
+ * Overload for multi-table output programs (i.e. output type is variant over struct).
+ */
+ virtual NYT::TNode MakeOutputSchema(TStringBuf) const = 0;
+ /// @}
+
+ /**
+ * Make full output schema. For single-output programs returns struct type, for multi-output programs
+ * returns variant type.
+ *
+ * Warning: calling this function may result in extended memory usage for large number of output tables.
+ */
+ virtual NYT::TNode MakeFullOutputSchema() const = 0;
+
+ /**
+ * Get compilation issues
+ */
+ virtual TIssues GetIssues() const = 0;
+
+ /**
+ * Get precompiled mkql program
+ */
+ virtual TString GetCompiledProgram() = 0;
+
+ /**
+ * Return a worker to the factory for possible reuse
+ */
+ virtual void ReturnWorker(IWorker* worker) = 0;
+ };
+
+ class TReleaseWorker {
+ public:
+ template <class T>
+ static inline void Destroy(T* t) noexcept {
+ t->Release();
+ }
+ };
+
+ template <class T>
+ using TWorkerHolder = THolder<T, TReleaseWorker>;
+
+ /**
+ * Factory for generating pull stream workers.
+ */
+ class IPullStreamWorkerFactory: public IWorkerFactory {
+ public:
+ /**
+ * Create a new pull stream worker.
+ */
+ virtual TWorkerHolder<IPullStreamWorker> MakeWorker() = 0;
+ };
+
+ /**
+ * Factory for generating pull list workers.
+ */
+ class IPullListWorkerFactory: public IWorkerFactory {
+ public:
+ /**
+ * Create a new pull list worker.
+ */
+ virtual TWorkerHolder<IPullListWorker> MakeWorker() = 0;
+ };
+
+ /**
+ * Factory for generating push stream workers.
+ */
+ class IPushStreamWorkerFactory: public IWorkerFactory {
+ public:
+ /**
+ * Create a new push stream worker.
+ */
+ virtual TWorkerHolder<IPushStreamWorker> MakeWorker() = 0;
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ /**
+ * Worker is a central part of any program instance. It contains current computation state
+ * (called computation graph) and objects required to work with it, including an allocator for unboxed values.
+ *
+ * Usually, users do not interact with workers directly. They use program instance entry points such as streams
+ * and consumers instead. The only case when one would have to to interact with workers is when implementing
+ * custom io-specification.
+ */
+ class IWorker {
+ protected:
+ friend class TReleaseWorker;
+ /**
+ * Cleanup the worker and return to a worker factory for reuse
+ */
+ virtual void Release() = 0;
+
+ public:
+ virtual ~IWorker() = default;
+
+ public:
+ /**
+ * Number of inputs for this program.
+ */
+ virtual ui32 GetInputsCount() const = 0;
+
+ /**
+ * MiniKQL input struct type of specified input for this program. Type is equivalent to the deduced input
+ * schema (see IWorker::MakeInputSchema())
+ *
+ * If ``original`` is set to ``true``, returns type without virtual system columns.
+ */
+ virtual const NKikimr::NMiniKQL::TStructType* GetInputType(ui32, bool original = false) const = 0;
+ /**
+ * Overload for single-input programs.
+ */
+ virtual const NKikimr::NMiniKQL::TStructType* GetInputType(bool original = false) const = 0;
+
+ /**
+ * MiniKQL output struct type for this program. The returned type is equivalent to the deduced output
+ * schema (see IWorker::MakeFullOutputSchema()).
+ */
+ virtual const NKikimr::NMiniKQL::TType* GetOutputType() const = 0;
+
+ /**
+ * Make input type schema for specified input as deduced by program optimizer. This schema is equivalent
+ * to one provided by input spec up to the order of the fields in structures.
+ */
+ virtual NYT::TNode MakeInputSchema(ui32) const = 0;
+ /**
+ * Overload for single-input programs.
+ */
+ virtual NYT::TNode MakeInputSchema() const = 0;
+
+ /**
+ * Make output type schema as deduced by program optimizer. If output spec provides its own schema, than
+ * this schema is equivalent to one provided by output spec up to the order of the fields in structures.
+ */
+ /// @{
+ /**
+ * Overload for single-table output programs (i.e. output type is struct).
+ */
+ virtual NYT::TNode MakeOutputSchema() const = 0;
+ /**
+ * Overload for multi-table output programs (i.e. output type is variant over tuple).
+ */
+ virtual NYT::TNode MakeOutputSchema(ui32) const = 0;
+ /**
+ * Overload for multi-table output programs (i.e. output type is variant over struct).
+ */
+ virtual NYT::TNode MakeOutputSchema(TStringBuf) const = 0;
+ /// @}
+
+ /**
+ * Generates full output schema. For single-output programs returns struct type, for multi-output programs
+ * returns variant type.
+ *
+ * Warning: calling this function may result in extended memory usage for large number of output tables.
+ */
+ virtual NYT::TNode MakeFullOutputSchema() const = 0;
+
+ /**
+ * Get scoped alloc used in this worker.
+ */
+ virtual NKikimr::NMiniKQL::TScopedAlloc& GetScopedAlloc() = 0;
+
+ /**
+ * Get computation graph.
+ */
+ virtual NKikimr::NMiniKQL::IComputationGraph& GetGraph() = 0;
+
+ /**
+ * Get function registry for this worker.
+ */
+ virtual const NKikimr::NMiniKQL::IFunctionRegistry& GetFunctionRegistry() const = 0;
+
+ /**
+ * Get type environment for this worker.
+ */
+ virtual NKikimr::NMiniKQL::TTypeEnvironment& GetTypeEnvironment() = 0;
+
+ /**
+ * Get llvm settings for this worker.
+ */
+ virtual const TString& GetLLVMSettings() const = 0;
+
+ /**
+ * Get YT Type V3 flags
+ */
+ virtual ui64 GetNativeYtTypeFlags() const = 0;
+
+ /**
+ * Get time provider
+ */
+ virtual ITimeProvider* GetTimeProvider() const = 0;
+ };
+
+ /**
+ * Worker which operates in pull stream mode.
+ */
+ class IPullStreamWorker: public IWorker {
+ public:
+ /**
+ * Set input computation graph node for specified input. The passed unboxed value should be a stream of
+ * structs. It should be created via the allocator associated with this very worker.
+ * This function can only be called once for each input.
+ */
+ virtual void SetInput(NKikimr::NUdf::TUnboxedValue&&, ui32) = 0;
+
+ /**
+ * Get the output computation graph node. The returned node will be a stream of structs or variants.
+ * This function cannot be called before setting an input value.
+ */
+ virtual NKikimr::NUdf::TUnboxedValue& GetOutput() = 0;
+ };
+
+ /**
+ * Worker which operates in pull list mode.
+ */
+ class IPullListWorker: public IWorker {
+ public:
+ /**
+ * Set input computation graph node for specified input. The passed unboxed value should be a list of
+ * structs. It should be created via the allocator associated with this very worker.
+ * This function can only be called once for each index.
+ */
+ virtual void SetInput(NKikimr::NUdf::TUnboxedValue&&, ui32) = 0;
+
+ /**
+ * Get the output computation graph node. The returned node will be a list of structs or variants.
+ * This function cannot be called before setting an input value.
+ */
+ virtual NKikimr::NUdf::TUnboxedValue& GetOutput() = 0;
+
+ /**
+ * Get iterator over the output list.
+ */
+ virtual NKikimr::NUdf::TUnboxedValue& GetOutputIterator() = 0;
+
+ /**
+ * Reset iterator to the beginning of the output list. After calling this function, GetOutputIterator()
+ * will return a fresh iterator; all previously returned iterators will become invalid.
+ */
+ virtual void ResetOutputIterator() = 0;
+ };
+
+ /**
+ * Worker which operates in push stream mode.
+ */
+ class IPushStreamWorker: public IWorker {
+ public:
+ /**
+ * Set a consumer where the worker will relay its output. This function can only be called once.
+ */
+ virtual void SetConsumer(THolder<IConsumer<const NKikimr::NUdf::TUnboxedValue*>>) = 0;
+
+ /**
+ * Push new value to the graph, than feed all new output to the consumer. Values cannot be pushed before
+ * assigning a consumer.
+ */
+ virtual void Push(NKikimr::NUdf::TUnboxedValue&&) = 0;
+
+ /**
+ * Send finish event and clear the computation graph. No new values will be accepted.
+ */
+ virtual void OnFinish() = 0;
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ /**
+ * Input specifications describe format for program input. They carry information about input data schema
+ * as well as the knowledge about how to convert input structures into unboxed values (data format which can be
+ * processed by the YQL runtime).
+ *
+ * Input spec defines the arguments of the program's Apply method. For example, a program
+ * with the protobuf input spec will accept a stream of protobuf messages while a program with the
+ * yson spec will accept an input stream (binary or text one).
+ *
+ * See documentation for input and output spec traits for hints on how to implement a custom specs.
+ */
+ class TInputSpecBase {
+ protected:
+ mutable TVector<THashMap<TString, NYT::TNode>> AllVirtualColumns_;
+
+ public:
+ virtual ~TInputSpecBase() = default;
+
+ public:
+ /**
+ * Get input data schemas in YQL format (NB: not a YT format). Each item of the returned vector must
+ * describe a structure.
+ *
+ * Format of each item is approximately this one:
+ *
+ * @code
+ * [
+ * 'StructType',
+ * [
+ * ["Field1Name", ["DataType", "Int32"]],
+ * ["Field2Name", ["DataType", "String"]],
+ * ...
+ * ]
+ * ]
+ * @endcode
+ */
+ virtual const TVector<NYT::TNode>& GetSchemas() const = 0;
+ // TODO: make a neat schema builder
+
+ /**
+ * Get virtual columns for each input.
+ *
+ * Key of each mapping is column name, value is data schema in YQL format.
+ */
+ const TVector<THashMap<TString, NYT::TNode>>& GetAllVirtualColumns() const {
+ if (AllVirtualColumns_.empty()) {
+ AllVirtualColumns_ = TVector<THashMap<TString, NYT::TNode>>(GetSchemas().size());
+ }
+
+ return AllVirtualColumns_;
+ }
+ };
+
+ /**
+ * Output specifications describe format for program output. Like input specifications, they cary knowledge
+ * about program output type and how to convert unboxed values into that type.
+ */
+ class TOutputSpecBase {
+ private:
+ TMaybe<THashSet<TString>> OutputColumnsFilter_;
+
+ public:
+ virtual ~TOutputSpecBase() = default;
+
+ public:
+ /**
+ * Get output data schema in YQL format (NB: not a YT format). The returned value must describe a structure
+ * or a variant made of structures for fulti-table outputs (note: not all specs support multi-table output).
+ *
+ * See docs for the input spec's GetSchemas().
+ *
+ * Also TNode entity could be returned (NYT::TNode::CreateEntity()),
+ * in which case output schema would be inferred from query and could be
+ * obtained by Program::GetOutputSchema() call.
+ */
+ virtual const NYT::TNode& GetSchema() const = 0;
+
+ /**
+ * Get an output columns filter.
+ *
+ * Output columns filter is a set of column names that should be left in the output. All columns that are
+ * not in this set will not be calculated. Depending on the output schema, they will be either removed
+ * completely (for optional columns) or filled with defaults (for required columns).
+ */
+ const TMaybe<THashSet<TString>>& GetOutputColumnsFilter() const {
+ return OutputColumnsFilter_;
+ }
+
+ /**
+ * Set new output columns filter.
+ */
+ void SetOutputColumnsFilter(const TMaybe<THashSet<TString>>& outputColumnsFilter) {
+ OutputColumnsFilter_ = outputColumnsFilter;
+ }
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ /**
+ * Input spec traits provide information on how to process program input.
+ *
+ * Each input spec should create a template specialization for this class, in which it should provide several
+ * static variables and functions.
+ *
+ * For example, a hypothetical example of implementing a JSON input spec would look like this:
+ *
+ * @code
+ * class TJsonInputSpec: public TInputSpecBase {
+ * // whatever magic you require for this spec
+ * };
+ *
+ * template <>
+ * class TInputSpecTraits<TJsonInputSpec> {
+ * // write here four constants, one typedef and three static functions described below
+ * };
+ * @endcode
+ *
+ * @tparam T input spec type.
+ */
+ template <typename T>
+ struct TInputSpecTraits {
+ /// Safety flag which should be set to false in all template specializations of this class. Attempt to
+ /// build a program using a spec with `IsPartial=true` will result in compilation error.
+ static const constexpr bool IsPartial = true;
+
+ /// Indicates whether this spec supports pull stream mode.
+ static const constexpr bool SupportPullStreamMode = false;
+ /// Indicates whether this spec supports pull list mode.
+ static const constexpr bool SupportPullListMode = false;
+ /// Indicates whether this spec supports push stream mode.
+ static const constexpr bool SupportPushStreamMode = false;
+
+ /// For push mode, indicates the return type of the builder's Process function.
+ using TConsumerType = void;
+
+ /// For pull stream mode, should take an input spec, a pull stream worker and whatever the user passed
+ /// to the program's Apply function, create an unboxed values with a custom stream implementations
+ /// and pass it to the worker's SetInput function for each input.
+ template <typename ...A>
+ static void PreparePullStreamWorker(const T&, IPullStreamWorker*, A&&...) {
+ Y_UNREACHABLE();
+ }
+
+ /// For pull list mode, should take an input spec, a pull list worker and whatever the user passed
+ /// to the program's Apply function, create an unboxed values with a custom list implementations
+ /// and pass it to the worker's SetInput function for each input.
+ template <typename ...A>
+ static void PreparePullListWorker(const T&, IPullListWorker*, A&&...) {
+ Y_UNREACHABLE();
+ }
+
+ /// For push stream mode, should take an input spec and a worker and create a consumer which will
+ /// be returned to the user. The consumer should keep the worker alive until its own destruction.
+ /// The return type of this function should exactly match the one defined in ConsumerType typedef.
+ static TConsumerType MakeConsumer(const T&, TWorkerHolder<IPushStreamWorker>) {
+ Y_UNREACHABLE();
+ }
+ };
+
+ /**
+ * Output spec traits provide information on how to process program output. Like with input specs, each output
+ * spec requires an appropriate template specialization of this class.
+ *
+ * @tparam T output spec type.
+ */
+ template <typename T>
+ struct TOutputSpecTraits {
+ /// Safety flag which should be set to false in all template specializations of this class. Attempt to
+ /// build a program using a spec with `IsPartial=false` will result in compilation error.
+ static const constexpr bool IsPartial = true;
+
+ /// Indicates whether this spec supports pull stream mode.
+ static const constexpr bool SupportPullStreamMode = false;
+ /// Indicates whether this spec supports pull list mode.
+ static const constexpr bool SupportPullListMode = false;
+ /// Indicates whether this spec supports push stream mode.
+ static const constexpr bool SupportPushStreamMode = false;
+
+ /// For pull stream mode, indicates the return type of the program's Apply function.
+ using TPullStreamReturnType = void;
+
+ /// For pull list mode, indicates the return type of the program's Apply function.
+ using TPullListReturnType = void;
+
+ /// For pull stream mode, should take an output spec and a worker and build a stream which will be returned
+ /// to the user. The return type of this function must match the one specified in the PullStreamReturnType.
+ static TPullStreamReturnType ConvertPullStreamWorkerToOutputType(const T&, TWorkerHolder<IPullStreamWorker>) {
+ Y_UNREACHABLE();
+ }
+
+ /// For pull list mode, should take an output spec and a worker and build a list which will be returned
+ /// to the user. The return type of this function must match the one specified in the PullListReturnType.
+ static TPullListReturnType ConvertPullListWorkerToOutputType(const T&, TWorkerHolder<IPullListWorker>) {
+ Y_UNREACHABLE();
+ }
+
+ /// For push stream mode, should take an output spec, a worker and whatever arguments the user passed
+ /// to the program's Apply function, create a consumer for unboxed values and pass it to the worker's
+ /// SetConsumer function.
+ template <typename ...A>
+ static void SetConsumerToWorker(const T&, IPushStreamWorker*, A&&...) {
+ Y_UNREACHABLE();
+ }
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////
+
+#define NOT_SPEC_MSG(spec_type) "passed class should be derived from " spec_type " spec base"
+#define PARTIAL_SPEC_MSG(spec_type) "this " spec_type " spec does not define its traits. Make sure you've passed " \
+ "an " spec_type " spec and not some other object; also make sure you've included " \
+ "all necessary headers. If you're developing a spec, make sure you have " \
+ "a spec traits template specialization"
+#define UNSUPPORTED_MODE_MSG(spec_type, mode) "this " spec_type " spec does not support " mode " mode"
+
+ class IProgram {
+ public:
+ virtual ~IProgram() = default;
+
+ public:
+ virtual const TInputSpecBase& GetInputSpecBase() const = 0;
+ virtual const TOutputSpecBase& GetOutputSpecBase() const = 0;
+ virtual const THashSet<TString>& GetUsedColumns(ui32) const = 0;
+ virtual const THashSet<TString>& GetUsedColumns() const = 0;
+ virtual NYT::TNode MakeInputSchema(ui32) const = 0;
+ virtual NYT::TNode MakeInputSchema() const = 0;
+ virtual NYT::TNode MakeOutputSchema() const = 0;
+ virtual NYT::TNode MakeOutputSchema(ui32) const = 0;
+ virtual NYT::TNode MakeOutputSchema(TStringBuf) const = 0;
+ virtual NYT::TNode MakeFullOutputSchema() const = 0;
+ virtual TIssues GetIssues() const = 0;
+ virtual TString GetCompiledProgram() = 0;
+
+ inline void MergeUsedColumns(THashSet<TString>& columns, ui32 inputIndex) {
+ const auto& usedColumns = GetUsedColumns(inputIndex);
+ columns.insert(usedColumns.begin(), usedColumns.end());
+ }
+
+ inline void MergeUsedColumns(THashSet<TString>& columns) {
+ const auto& usedColumns = GetUsedColumns();
+ columns.insert(usedColumns.begin(), usedColumns.end());
+ }
+ };
+
+ template <typename TInputSpec, typename TOutputSpec, typename WorkerFactory>
+ class TProgramCommon: public IProgram {
+ static_assert(std::is_base_of<TInputSpecBase, TInputSpec>::value, NOT_SPEC_MSG("input"));
+ static_assert(std::is_base_of<TOutputSpecBase, TOutputSpec>::value, NOT_SPEC_MSG("output"));
+
+ protected:
+ TInputSpec InputSpec_;
+ TOutputSpec OutputSpec_;
+ std::shared_ptr<WorkerFactory> WorkerFactory_;
+
+ public:
+ explicit TProgramCommon(
+ TInputSpec inputSpec,
+ TOutputSpec outputSpec,
+ std::shared_ptr<WorkerFactory> workerFactory
+ )
+ : InputSpec_(inputSpec)
+ , OutputSpec_(outputSpec)
+ , WorkerFactory_(std::move(workerFactory))
+ {
+ }
+
+ public:
+ const TInputSpec& GetInputSpec() const {
+ return InputSpec_;
+ }
+
+ const TOutputSpec& GetOutputSpec() const {
+ return OutputSpec_;
+ }
+
+ const TInputSpecBase& GetInputSpecBase() const override {
+ return InputSpec_;
+ }
+
+ const TOutputSpecBase& GetOutputSpecBase() const override {
+ return OutputSpec_;
+ }
+
+ const THashSet<TString>& GetUsedColumns(ui32 inputIndex) const override {
+ return WorkerFactory_->GetUsedColumns(inputIndex);
+ }
+
+ const THashSet<TString>& GetUsedColumns() const override {
+ return WorkerFactory_->GetUsedColumns();
+ }
+
+ NYT::TNode MakeInputSchema(ui32 inputIndex) const override {
+ return WorkerFactory_->MakeInputSchema(inputIndex);
+ }
+
+ NYT::TNode MakeInputSchema() const override {
+ return WorkerFactory_->MakeInputSchema();
+ }
+
+ NYT::TNode MakeOutputSchema() const override {
+ return WorkerFactory_->MakeOutputSchema();
+ }
+
+ NYT::TNode MakeOutputSchema(ui32 outputIndex) const override {
+ return WorkerFactory_->MakeOutputSchema(outputIndex);
+ }
+
+ NYT::TNode MakeOutputSchema(TStringBuf outputName) const override {
+ return WorkerFactory_->MakeOutputSchema(outputName);
+ }
+
+ NYT::TNode MakeFullOutputSchema() const override {
+ return WorkerFactory_->MakeFullOutputSchema();
+ }
+
+ TIssues GetIssues() const override {
+ return WorkerFactory_->GetIssues();
+ }
+
+ TString GetCompiledProgram() override {
+ return WorkerFactory_->GetCompiledProgram();
+ }
+ };
+
+ template <typename TInputSpec, typename TOutputSpec>
+ class TPullStreamProgram final: public TProgramCommon<TInputSpec, TOutputSpec, IPullStreamWorkerFactory> {
+ using TProgramCommon<TInputSpec, TOutputSpec, IPullStreamWorkerFactory>::WorkerFactory_;
+ using TProgramCommon<TInputSpec, TOutputSpec, IPullStreamWorkerFactory>::InputSpec_;
+ using TProgramCommon<TInputSpec, TOutputSpec, IPullStreamWorkerFactory>::OutputSpec_;
+
+ public:
+ using TProgramCommon<TInputSpec, TOutputSpec, IPullStreamWorkerFactory>::TProgramCommon;
+
+ public:
+ template <typename ...T>
+ typename TOutputSpecTraits<TOutputSpec>::TPullStreamReturnType Apply(T&& ... t) {
+ static_assert(!TInputSpecTraits<TInputSpec>::IsPartial, PARTIAL_SPEC_MSG("input"));
+ static_assert(!TOutputSpecTraits<TOutputSpec>::IsPartial, PARTIAL_SPEC_MSG("output"));
+ static_assert(TInputSpecTraits<TInputSpec>::SupportPullStreamMode, UNSUPPORTED_MODE_MSG("input", "pull stream"));
+ static_assert(TOutputSpecTraits<TOutputSpec>::SupportPullStreamMode, UNSUPPORTED_MODE_MSG("output", "pull stream"));
+
+ auto worker = WorkerFactory_->MakeWorker();
+ TInputSpecTraits<TInputSpec>::PreparePullStreamWorker(InputSpec_, worker.Get(), std::forward<T>(t)...);
+ return TOutputSpecTraits<TOutputSpec>::ConvertPullStreamWorkerToOutputType(OutputSpec_, std::move(worker));
+ }
+ };
+
+ template <typename TInputSpec, typename TOutputSpec>
+ class TPullListProgram final: public TProgramCommon<TInputSpec, TOutputSpec, IPullListWorkerFactory> {
+ using TProgramCommon<TInputSpec, TOutputSpec, IPullListWorkerFactory>::WorkerFactory_;
+ using TProgramCommon<TInputSpec, TOutputSpec, IPullListWorkerFactory>::InputSpec_;
+ using TProgramCommon<TInputSpec, TOutputSpec, IPullListWorkerFactory>::OutputSpec_;
+
+ public:
+ using TProgramCommon<TInputSpec, TOutputSpec, IPullListWorkerFactory>::TProgramCommon;
+
+ public:
+ template <typename ...T>
+ typename TOutputSpecTraits<TOutputSpec>::TPullListReturnType Apply(T&& ... t) {
+ static_assert(!TInputSpecTraits<TInputSpec>::IsPartial, PARTIAL_SPEC_MSG("input"));
+ static_assert(!TOutputSpecTraits<TOutputSpec>::IsPartial, PARTIAL_SPEC_MSG("output"));
+ static_assert(TInputSpecTraits<TInputSpec>::SupportPullListMode, UNSUPPORTED_MODE_MSG("input", "pull list"));
+ static_assert(TOutputSpecTraits<TOutputSpec>::SupportPullListMode, UNSUPPORTED_MODE_MSG("output", "pull list"));
+
+ auto worker = WorkerFactory_->MakeWorker();
+ TInputSpecTraits<TInputSpec>::PreparePullListWorker(InputSpec_, worker.Get(), std::forward<T>(t)...);
+ return TOutputSpecTraits<TOutputSpec>::ConvertPullListWorkerToOutputType(OutputSpec_, std::move(worker));
+ }
+ };
+
+ template <typename TInputSpec, typename TOutputSpec>
+ class TPushStreamProgram final: public TProgramCommon<TInputSpec, TOutputSpec, IPushStreamWorkerFactory> {
+ using TProgramCommon<TInputSpec, TOutputSpec, IPushStreamWorkerFactory>::WorkerFactory_;
+ using TProgramCommon<TInputSpec, TOutputSpec, IPushStreamWorkerFactory>::InputSpec_;
+ using TProgramCommon<TInputSpec, TOutputSpec, IPushStreamWorkerFactory>::OutputSpec_;
+
+ public:
+ using TProgramCommon<TInputSpec, TOutputSpec, IPushStreamWorkerFactory>::TProgramCommon;
+
+ public:
+ template <typename ...T>
+ typename TInputSpecTraits<TInputSpec>::TConsumerType Apply(T&& ... t) {
+ static_assert(!TInputSpecTraits<TInputSpec>::IsPartial, PARTIAL_SPEC_MSG("input"));
+ static_assert(!TOutputSpecTraits<TOutputSpec>::IsPartial, PARTIAL_SPEC_MSG("output"));
+ static_assert(TInputSpecTraits<TInputSpec>::SupportPushStreamMode, UNSUPPORTED_MODE_MSG("input", "push stream"));
+ static_assert(TOutputSpecTraits<TOutputSpec>::SupportPushStreamMode, UNSUPPORTED_MODE_MSG("output", "push stream"));
+
+ auto worker = WorkerFactory_->MakeWorker();
+ TOutputSpecTraits<TOutputSpec>::SetConsumerToWorker(OutputSpec_, worker.Get(), std::forward<T>(t)...);
+ return TInputSpecTraits<TInputSpec>::MakeConsumer(InputSpec_, std::move(worker));
+ }
+ };
+
+#undef NOT_SPEC_MSG
+#undef PARTIAL_SPEC_MSG
+#undef UNSUPPORTED_MODE_MSG
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ /**
+ * Configure global logging facilities. Affects all YQL modules.
+ */
+ void ConfigureLogging(const TLoggingOptions& = {});
+
+ /**
+ * Create a new program factory.
+ * Custom logging initialization could be preformed by a call to the ConfigureLogging method beforehand.
+ * If the ConfigureLogging method has not been called the default logging initialization will be performed.
+ */
+ IProgramFactoryPtr MakeProgramFactory(const TProgramFactoryOptions& = {});
+ }
+}
+
+Y_DECLARE_OUT_SPEC(inline, NYql::NPureCalc::TCompileError, stream, value) {
+ stream << value.AsStrBuf() << Endl << "Issues:" << Endl << value.GetIssues() << Endl << Endl << "Yql:" << Endl <<value.GetYql();
+}
diff --git a/ydb/library/yql/public/purecalc/common/logger_init.cpp b/ydb/library/yql/public/purecalc/common/logger_init.cpp
new file mode 100644
index 0000000000..b172eb1d03
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/common/logger_init.cpp
@@ -0,0 +1,32 @@
+#include "logger_init.h"
+
+#include <ydb/library/yql/utils/log/log.h>
+
+#include <atomic>
+
+namespace NYql {
+namespace NPureCalc {
+
+namespace {
+ std::atomic_bool Initialized;
+}
+
+ void InitLogging(const TLoggingOptions& options) {
+ NLog::InitLogger(options.LogDestination);
+ auto& logger = NLog::YqlLogger();
+ logger.SetDefaultPriority(options.LogLevel_);
+ for (int i = 0; i < NLog::EComponentHelpers::ToInt(NLog::EComponent::MaxValue); ++i) {
+ logger.SetComponentLevel((NLog::EComponent) i, (NLog::ELevel) options.LogLevel_);
+ }
+ Initialized = true;
+ }
+
+ void EnsureLoggingInitialized() {
+ if (Initialized.load()) {
+ return;
+ }
+ InitLogging(TLoggingOptions());
+ }
+
+}
+}
diff --git a/ydb/library/yql/public/purecalc/common/logger_init.h b/ydb/library/yql/public/purecalc/common/logger_init.h
new file mode 100644
index 0000000000..039cbd4411
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/common/logger_init.h
@@ -0,0 +1,10 @@
+#pragma once
+
+#include "interface.h"
+
+namespace NYql {
+ namespace NPureCalc {
+ void InitLogging(const TLoggingOptions& options);
+ void EnsureLoggingInitialized();
+ }
+}
diff --git a/ydb/library/yql/public/purecalc/common/names.cpp b/ydb/library/yql/public/purecalc/common/names.cpp
new file mode 100644
index 0000000000..551772842b
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/common/names.cpp
@@ -0,0 +1,16 @@
+#include "names.h"
+
+#include <util/generic/strbuf.h>
+
+namespace NYql::NPureCalc {
+ const TStringBuf PurecalcSysColumnsPrefix = "_yql_sys_";
+ const TStringBuf PurecalcSysColumnTablePath = "_yql_sys_tablepath";
+
+ const TStringBuf PurecalcDefaultCluster = "view";
+ const TStringBuf PurecalcDefaultService = "data";
+
+ const TStringBuf PurecalcInputCallableName = "Self";
+ const TStringBuf PurecalcInputTablePrefix = "Input";
+
+ const TStringBuf PurecalcUdfModulePrefix = "<purecalc>::";
+}
diff --git a/ydb/library/yql/public/purecalc/common/names.h b/ydb/library/yql/public/purecalc/common/names.h
new file mode 100644
index 0000000000..dc08ccd3d0
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/common/names.h
@@ -0,0 +1,16 @@
+#pragma once
+
+#include <util/generic/fwd.h>
+
+namespace NYql::NPureCalc {
+ extern const TStringBuf PurecalcSysColumnsPrefix;
+ extern const TStringBuf PurecalcSysColumnTablePath;
+
+ extern const TStringBuf PurecalcDefaultCluster;
+ extern const TStringBuf PurecalcDefaultService;
+
+ extern const TStringBuf PurecalcInputCallableName;
+ extern const TStringBuf PurecalcInputTablePrefix;
+
+ extern const TStringBuf PurecalcUdfModulePrefix;
+}
diff --git a/ydb/library/yql/public/purecalc/common/processor_mode.cpp b/ydb/library/yql/public/purecalc/common/processor_mode.cpp
new file mode 100644
index 0000000000..957cc2d7f4
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/common/processor_mode.cpp
@@ -0,0 +1 @@
+#include "processor_mode.h"
diff --git a/ydb/library/yql/public/purecalc/common/processor_mode.h b/ydb/library/yql/public/purecalc/common/processor_mode.h
new file mode 100644
index 0000000000..9bec87cadc
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/common/processor_mode.h
@@ -0,0 +1,11 @@
+#pragma once
+
+namespace NYql {
+ namespace NPureCalc {
+ enum class EProcessorMode {
+ PullList,
+ PullStream,
+ PushStream
+ };
+ }
+}
diff --git a/ydb/library/yql/public/purecalc/common/program_factory.cpp b/ydb/library/yql/public/purecalc/common/program_factory.cpp
new file mode 100644
index 0000000000..53b30f884b
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/common/program_factory.cpp
@@ -0,0 +1,144 @@
+#include "program_factory.h"
+#include "logger_init.h"
+#include "names.h"
+#include "worker_factory.h"
+
+#include <ydb/library/yql/utils/log/log.h>
+
+using namespace NYql;
+using namespace NYql::NPureCalc;
+
+TProgramFactory::TProgramFactory(const TProgramFactoryOptions& options)
+ : Options_(options)
+ , CountersProvider_(nullptr)
+{
+ EnsureLoggingInitialized();
+
+ NUserData::TUserData::UserDataToLibraries(Options_.UserData_, Modules_);
+
+ UserData_ = GetYqlModuleResolver(ExprContext_, ModuleResolver_, Options_.UserData_, {}, {});
+
+ if (!ModuleResolver_) {
+ ythrow TCompileError("", ExprContext_.IssueManager.GetIssues().ToString()) << "failed to compile modules";
+ }
+
+ TVector<TString> UDFsPaths;
+ for (const auto& item: Options_.UserData_) {
+ if (
+ item.Type_ == NUserData::EType::UDF &&
+ item.Disposition_ == NUserData::EDisposition::FILESYSTEM
+ ) {
+ UDFsPaths.push_back(item.Content_);
+ }
+ }
+
+ if (!Options_.UdfsDir_.empty()) {
+ NKikimr::NMiniKQL::FindUdfsInDir(Options_.UdfsDir_, &UDFsPaths);
+ }
+
+ FuncRegistry_ = NKikimr::NMiniKQL::CreateFunctionRegistry(
+ &NYql::NBacktrace::KikimrBackTrace, NKikimr::NMiniKQL::CreateBuiltinRegistry(), false, UDFsPaths)->Clone();
+
+ NKikimr::NMiniKQL::FillStaticModules(*FuncRegistry_);
+}
+
+TProgramFactory::~TProgramFactory() {
+}
+
+void TProgramFactory::AddUdfModule(
+ const TStringBuf& moduleName,
+ NKikimr::NUdf::TUniquePtr<NKikimr::NUdf::IUdfModule>&& module
+) {
+ FuncRegistry_->AddModule(
+ TString::Join(PurecalcUdfModulePrefix, moduleName), moduleName, std::move(module)
+ );
+}
+
+void TProgramFactory::SetCountersProvider(NKikimr::NUdf::ICountersProvider* provider) {
+ CountersProvider_ = provider;
+}
+
+IPullStreamWorkerFactoryPtr TProgramFactory::MakePullStreamWorkerFactory(
+ const TInputSpecBase& inputSpec,
+ const TOutputSpecBase& outputSpec,
+ TString query,
+ ETranslationMode mode,
+ ui16 syntaxVersion
+) {
+ return std::make_shared<TPullStreamWorkerFactory>(TWorkerFactoryOptions(
+ TIntrusivePtr<TProgramFactory>(this),
+ inputSpec,
+ outputSpec,
+ query,
+ FuncRegistry_,
+ ModuleResolver_,
+ UserData_,
+ Modules_,
+ Options_.LLVMSettings,
+ CountersProvider_,
+ mode,
+ syntaxVersion,
+ Options_.NativeYtTypeFlags,
+ Options_.DeterministicTimeProviderSeed,
+ Options_.UseSystemColumns,
+ Options_.UseWorkerPool
+ ));
+}
+
+IPullListWorkerFactoryPtr TProgramFactory::MakePullListWorkerFactory(
+ const TInputSpecBase& inputSpec,
+ const TOutputSpecBase& outputSpec,
+ TString query,
+ ETranslationMode mode,
+ ui16 syntaxVersion
+) {
+ return std::make_shared<TPullListWorkerFactory>(TWorkerFactoryOptions(
+ TIntrusivePtr<TProgramFactory>(this),
+ inputSpec,
+ outputSpec,
+ query,
+ FuncRegistry_,
+ ModuleResolver_,
+ UserData_,
+ Modules_,
+ Options_.LLVMSettings,
+ CountersProvider_,
+ mode,
+ syntaxVersion,
+ Options_.NativeYtTypeFlags,
+ Options_.DeterministicTimeProviderSeed,
+ Options_.UseSystemColumns,
+ Options_.UseWorkerPool
+ ));
+}
+
+IPushStreamWorkerFactoryPtr TProgramFactory::MakePushStreamWorkerFactory(
+ const TInputSpecBase& inputSpec,
+ const TOutputSpecBase& outputSpec,
+ TString query,
+ ETranslationMode mode,
+ ui16 syntaxVersion
+) {
+ if (inputSpec.GetSchemas().size() > 1) {
+ ythrow yexception() << "push stream mode doesn't support several inputs";
+ }
+
+ return std::make_shared<TPushStreamWorkerFactory>(TWorkerFactoryOptions(
+ TIntrusivePtr<TProgramFactory>(this),
+ inputSpec,
+ outputSpec,
+ query,
+ FuncRegistry_,
+ ModuleResolver_,
+ UserData_,
+ Modules_,
+ Options_.LLVMSettings,
+ CountersProvider_,
+ mode,
+ syntaxVersion,
+ Options_.NativeYtTypeFlags,
+ Options_.DeterministicTimeProviderSeed,
+ Options_.UseSystemColumns,
+ Options_.UseWorkerPool
+ ));
+}
diff --git a/ydb/library/yql/public/purecalc/common/program_factory.h b/ydb/library/yql/public/purecalc/common/program_factory.h
new file mode 100644
index 0000000000..d1402c21fd
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/common/program_factory.h
@@ -0,0 +1,46 @@
+#pragma once
+
+#include "interface.h"
+
+#include <ydb/library/yql/utils/backtrace/backtrace.h>
+#include <ydb/library/yql/core/services/mounts/yql_mounts.h>
+
+#include <ydb/library/yql/ast/yql_expr.h>
+#include <ydb/library/yql/core/yql_user_data.h>
+#include <ydb/library/yql/minikql/mkql_function_registry.h>
+#include <ydb/library/yql/minikql/invoke_builtins/mkql_builtins.h>
+
+#include <util/generic/function.h>
+#include <util/generic/ptr.h>
+#include <util/generic/strbuf.h>
+
+namespace NYql {
+ namespace NPureCalc {
+ class TProgramFactory: public IProgramFactory {
+ private:
+ TProgramFactoryOptions Options_;
+ TExprContext ExprContext_;
+ TIntrusivePtr<NKikimr::NMiniKQL::IMutableFunctionRegistry> FuncRegistry_;
+ IModuleResolver::TPtr ModuleResolver_;
+ TUserDataTable UserData_;
+ THashMap<TString, TString> Modules_;
+ NKikimr::NUdf::ICountersProvider* CountersProvider_;
+
+ public:
+ explicit TProgramFactory(const TProgramFactoryOptions&);
+ ~TProgramFactory() override;
+
+ public:
+ void AddUdfModule(
+ const TStringBuf& moduleName,
+ NKikimr::NUdf::TUniquePtr<NKikimr::NUdf::IUdfModule>&& module
+ ) override;
+
+ void SetCountersProvider(NKikimr::NUdf::ICountersProvider* provider) override;
+
+ IPullStreamWorkerFactoryPtr MakePullStreamWorkerFactory(const TInputSpecBase&, const TOutputSpecBase&, TString, ETranslationMode, ui16) override;
+ IPullListWorkerFactoryPtr MakePullListWorkerFactory(const TInputSpecBase&, const TOutputSpecBase&, TString, ETranslationMode, ui16) override;
+ IPushStreamWorkerFactoryPtr MakePushStreamWorkerFactory(const TInputSpecBase&, const TOutputSpecBase&, TString, ETranslationMode, ui16) override;
+ };
+ }
+}
diff --git a/ydb/library/yql/public/purecalc/common/transformations/align_output_schema.cpp b/ydb/library/yql/public/purecalc/common/transformations/align_output_schema.cpp
new file mode 100644
index 0000000000..b339488cbd
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/common/transformations/align_output_schema.cpp
@@ -0,0 +1,93 @@
+#include "align_output_schema.h"
+
+#include <ydb/library/yql/public/purecalc/common/type_from_schema.h>
+
+#include <ydb/library/yql/core/yql_expr_type_annotation.h>
+
+using namespace NYql;
+using namespace NYql::NPureCalc;
+
+namespace {
+ class TOutputAligner : public TSyncTransformerBase {
+ private:
+ const TTypeAnnotationNode* OutputStruct_;
+ EProcessorMode ProcessorMode_;
+
+ public:
+ explicit TOutputAligner(const TTypeAnnotationNode* outputStruct, EProcessorMode processorMode)
+ : OutputStruct_(outputStruct)
+ , ProcessorMode_(processorMode)
+ {
+ }
+
+ public:
+ TStatus DoTransform(TExprNode::TPtr input, TExprNode::TPtr& output, TExprContext& ctx) final {
+ output = input;
+
+ const auto* expectedType = MakeExpectedType(ctx);
+ const auto* expectedItemType = MakeExpectedItemType();
+ const auto* actualType = MakeActualType(input);
+ const auto* actualItemType = MakeActualItemType(input);
+
+ if (!ValidateOutputType(actualItemType, expectedItemType, ctx)) {
+ return TStatus::Error;
+ }
+
+ if (!expectedType) {
+ return TStatus::Ok;
+ }
+
+ auto status = TryConvertTo(output, *actualType, *expectedType, ctx);
+
+ if (status.Level == IGraphTransformer::TStatus::Repeat) {
+ status = IGraphTransformer::TStatus(IGraphTransformer::TStatus::Repeat, true);
+ }
+
+ return status;
+ }
+
+ void Rewind() final {
+ }
+
+ private:
+ const TTypeAnnotationNode* MakeExpectedType(TExprContext& ctx) {
+ if (!OutputStruct_) {
+ return nullptr;
+ }
+
+ switch (ProcessorMode_) {
+ case EProcessorMode::PullList:
+ return ctx.MakeType<TListExprType>(OutputStruct_);
+ case EProcessorMode::PullStream:
+ case EProcessorMode::PushStream:
+ return ctx.MakeType<TStreamExprType>(OutputStruct_);
+ }
+
+ Y_FAIL("Unexpected");
+ }
+
+ const TTypeAnnotationNode* MakeExpectedItemType() {
+ return OutputStruct_;
+ }
+
+ const TTypeAnnotationNode* MakeActualType(TExprNode::TPtr& input) {
+ return input->GetTypeAnn();
+ }
+
+ const TTypeAnnotationNode* MakeActualItemType(TExprNode::TPtr& input) {
+ auto actualType = MakeActualType(input);
+ switch (actualType->GetKind()) {
+ case ETypeAnnotationKind::Stream:
+ return actualType->Cast<TStreamExprType>()->GetItemType();
+ case ETypeAnnotationKind::List:
+ return actualType->Cast<TListExprType>()->GetItemType();
+ default:
+ Y_FAIL("unexpected return type");
+ }
+ }
+ };
+}
+
+TAutoPtr<IGraphTransformer> NYql::NPureCalc::MakeOutputAligner(const TTypeAnnotationNode* outputStruct, EProcessorMode processorMode) {
+ return new TOutputAligner(outputStruct, processorMode);
+}
diff --git a/ydb/library/yql/public/purecalc/common/transformations/align_output_schema.h b/ydb/library/yql/public/purecalc/common/transformations/align_output_schema.h
new file mode 100644
index 0000000000..667a50ebf1
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/common/transformations/align_output_schema.h
@@ -0,0 +1,21 @@
+#pragma once
+
+#include <ydb/library/yql/public/purecalc/common/processor_mode.h>
+
+#include <ydb/library/yql/core/yql_graph_transformer.h>
+#include <ydb/library/yql/core/yql_type_annotation.h>
+
+namespace NYql {
+ namespace NPureCalc {
+ /**
+ * A transformer which converts an output type of the expression to the given type or reports an error.
+ *
+ * @param outputStruct destination output struct type.
+ * @return a graph transformer for type alignment.
+ */
+ TAutoPtr<IGraphTransformer> MakeOutputAligner(
+ const TTypeAnnotationNode* outputStruct,
+ EProcessorMode processorMode
+ );
+ }
+}
diff --git a/ydb/library/yql/public/purecalc/common/transformations/extract_used_columns.cpp b/ydb/library/yql/public/purecalc/common/transformations/extract_used_columns.cpp
new file mode 100644
index 0000000000..20cffb0112
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/common/transformations/extract_used_columns.cpp
@@ -0,0 +1,96 @@
+#include "extract_used_columns.h"
+
+#include <ydb/library/yql/public/purecalc/common/inspect_input.h>
+
+#include <ydb/library/yql/core/yql_expr_optimize.h>
+#include <ydb/library/yql/core/expr_nodes/yql_expr_nodes.h>
+
+using namespace NYql;
+using namespace NYql::NPureCalc;
+
+namespace {
+ class TUsedColumnsExtractor : public TSyncTransformerBase {
+ private:
+ TVector<THashSet<TString>>* const Destination_;
+ const TVector<THashSet<TString>>& AllColumns_;
+ TString NodeName_;
+
+ bool CalculatedUsedFields_ = false;
+
+ public:
+ TUsedColumnsExtractor(
+ TVector<THashSet<TString>>* destination,
+ const TVector<THashSet<TString>>& allColumns,
+ TString nodeName
+ )
+ : Destination_(destination)
+ , AllColumns_(allColumns)
+ , NodeName_(std::move(nodeName))
+ {
+ }
+
+ TUsedColumnsExtractor(TVector<THashSet<TString>>*, TVector<THashSet<TString>>&&, TString) = delete;
+
+ public:
+ TStatus DoTransform(TExprNode::TPtr input, TExprNode::TPtr& output, TExprContext& ctx) final {
+ output = input;
+
+ if (CalculatedUsedFields_) {
+ return IGraphTransformer::TStatus::Ok;
+ }
+
+ bool hasError = false;
+
+ *Destination_ = AllColumns_;
+
+ VisitExpr(input, [&](const TExprNode::TPtr& inputExpr) {
+ NNodes::TExprBase node(inputExpr);
+ if (auto maybeExtract = node.Maybe<NNodes::TCoExtractMembers>()) {
+ auto extract = maybeExtract.Cast();
+ const auto& arg = extract.Input().Ref();
+ if (arg.IsCallable(NodeName_)) {
+ ui32 inputIndex;
+ if (!TryFetchInputIndexFromSelf(arg, ctx, AllColumns_.size(), inputIndex)) {
+ hasError = true;
+ return false;
+ }
+
+ YQL_ENSURE(inputIndex < AllColumns_.size());
+
+ auto& destinationColumnsSet = (*Destination_)[inputIndex];
+ const auto& allColumnsSet = AllColumns_[inputIndex];
+
+ destinationColumnsSet.clear();
+ for (const auto& columnAtom : extract.Members()) {
+ TString name = TString(columnAtom.Value());
+ YQL_ENSURE(allColumnsSet.contains(name), "unexpected column in the input struct");
+ destinationColumnsSet.insert(name);
+ }
+ }
+ }
+
+ return true;
+ });
+
+ if (hasError) {
+ return IGraphTransformer::TStatus::Error;
+ }
+
+ CalculatedUsedFields_ = true;
+
+ return IGraphTransformer::TStatus::Ok;
+ }
+
+ void Rewind() final {
+ CalculatedUsedFields_ = false;
+ }
+ };
+}
+
+TAutoPtr<IGraphTransformer> NYql::NPureCalc::MakeUsedColumnsExtractor(
+ TVector<THashSet<TString>>* destination,
+ const TVector<THashSet<TString>>& allColumns,
+ const TString& nodeName
+) {
+ return new TUsedColumnsExtractor(destination, allColumns, nodeName);
+}
diff --git a/ydb/library/yql/public/purecalc/common/transformations/extract_used_columns.h b/ydb/library/yql/public/purecalc/common/transformations/extract_used_columns.h
new file mode 100644
index 0000000000..0199be46f7
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/common/transformations/extract_used_columns.h
@@ -0,0 +1,29 @@
+#pragma once
+
+#include <ydb/library/yql/public/purecalc/common/names.h>
+
+#include <ydb/library/yql/core/yql_graph_transformer.h>
+#include <ydb/library/yql/core/yql_type_annotation.h>
+
+#include <util/generic/hash_set.h>
+#include <util/generic/string.h>
+
+namespace NYql {
+ namespace NPureCalc {
+ /**
+ * Make transformation which builds sets of input columns from the given expression.
+ *
+ * @param destination a vector of string sets which will be populated with column names sets when
+ * transformation pipeline is launched. This pointer should contain a valid
+ * TVector<THashSet> instance. The transformation will overwrite its contents.
+ * @param allColumns vector of sets with all available columns for each input.
+ * @param nodeName name of the callable used to get input data, e.g. `Self`.
+ * @return an extractor which scans an input structs contents and populates destination.
+ */
+ TAutoPtr<IGraphTransformer> MakeUsedColumnsExtractor(
+ TVector<THashSet<TString>>* destination,
+ const TVector<THashSet<TString>>& allColumns,
+ const TString& nodeName = TString{PurecalcInputCallableName}
+ );
+ }
+}
diff --git a/ydb/library/yql/public/purecalc/common/transformations/output_columns_filter.cpp b/ydb/library/yql/public/purecalc/common/transformations/output_columns_filter.cpp
new file mode 100644
index 0000000000..c6eaf01139
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/common/transformations/output_columns_filter.cpp
@@ -0,0 +1,100 @@
+#include "output_columns_filter.h"
+
+#include <ydb/library/yql/core/yql_expr_type_annotation.h>
+
+using namespace NYql;
+using namespace NYql::NPureCalc;
+
+namespace {
+ class TOutputColumnsFilter: public TSyncTransformerBase {
+ private:
+ TMaybe<THashSet<TString>> Filter_;
+ bool Fired_;
+
+ public:
+ explicit TOutputColumnsFilter(TMaybe<THashSet<TString>> filter)
+ : Filter_(std::move(filter))
+ , Fired_(false)
+ {
+ }
+
+ public:
+ void Rewind() override {
+ Fired_ = false;
+ }
+
+ TStatus DoTransform(TExprNode::TPtr input, TExprNode::TPtr& output, TExprContext& ctx) final {
+ output = input;
+
+ if (Fired_ || Filter_.Empty()) {
+ return IGraphTransformer::TStatus::Ok;
+ }
+
+ const TTypeAnnotationNode* returnType = output->GetTypeAnn();
+ const TTypeAnnotationNode* returnItemType = nullptr;
+ switch (returnType->GetKind()) {
+ case ETypeAnnotationKind::Stream:
+ returnItemType = returnType->Cast<TStreamExprType>()->GetItemType();
+ break;
+ case ETypeAnnotationKind::List:
+ returnItemType = returnType->Cast<TListExprType>()->GetItemType();
+ break;
+ default:
+ Y_FAIL("unexpected return type");
+ }
+
+ if (returnItemType->GetKind() != ETypeAnnotationKind::Struct) {
+ ctx.AddError(TIssue(ctx.GetPosition(output->Pos()), "columns filter only supported for single-output programs"));
+ }
+
+ const auto* returnItemStruct = returnItemType->Cast<TStructExprType>();
+
+ auto arg = ctx.NewArgument(TPositionHandle(), "row");
+ TExprNode::TListType asStructItems;
+ for (const auto& x : returnItemStruct->GetItems()) {
+ TExprNode::TPtr value;
+ if (Filter_->contains(x->GetName())) {
+ value = ctx.Builder({})
+ .Callable("Member")
+ .Add(0, arg)
+ .Atom(1, x->GetName())
+ .Seal()
+ .Build();
+ } else {
+ auto type = x->GetItemType();
+ value = ctx.Builder({})
+ .Callable(type->GetKind() == ETypeAnnotationKind::Optional ? "Nothing" : "Default")
+ .Add(0, ExpandType({}, *type, ctx))
+ .Seal()
+ .Build();
+ }
+
+ auto item = ctx.Builder({})
+ .List()
+ .Atom(0, x->GetName())
+ .Add(1, value)
+ .Seal()
+ .Build();
+
+ asStructItems.push_back(item);
+ }
+
+ auto body = ctx.NewCallable(TPositionHandle(), "AsStruct", std::move(asStructItems));
+ auto lambda = ctx.NewLambda(TPositionHandle(), ctx.NewArguments(TPositionHandle(), {arg}), std::move(body));
+ output = ctx.Builder(TPositionHandle())
+ .Callable("Map")
+ .Add(0, output)
+ .Add(1, lambda)
+ .Seal()
+ .Build();
+
+ Fired_ = true;
+
+ return IGraphTransformer::TStatus(IGraphTransformer::TStatus::Repeat, true);
+ }
+ };
+}
+
+TAutoPtr<IGraphTransformer> NYql::NPureCalc::MakeOutputColumnsFilter(const TMaybe<THashSet<TString>>& columns) {
+ return new TOutputColumnsFilter(columns);
+}
diff --git a/ydb/library/yql/public/purecalc/common/transformations/output_columns_filter.h b/ydb/library/yql/public/purecalc/common/transformations/output_columns_filter.h
new file mode 100644
index 0000000000..1e86ae5276
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/common/transformations/output_columns_filter.h
@@ -0,0 +1,18 @@
+#pragma once
+
+#include <ydb/library/yql/public/purecalc/common/processor_mode.h>
+
+#include <ydb/library/yql/core/yql_graph_transformer.h>
+#include <ydb/library/yql/core/yql_type_annotation.h>
+
+namespace NYql {
+ namespace NPureCalc {
+ /**
+ * A transformer which removes unwanted columns from output.
+ *
+ * @param columns remove all columns that are not in this set.
+ * @return a graph transformer for filtering output.
+ */
+ TAutoPtr<IGraphTransformer> MakeOutputColumnsFilter(const TMaybe<THashSet<TString>>& columns);
+ }
+}
diff --git a/ydb/library/yql/public/purecalc/common/transformations/replace_table_reads.cpp b/ydb/library/yql/public/purecalc/common/transformations/replace_table_reads.cpp
new file mode 100644
index 0000000000..20b7eaa174
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/common/transformations/replace_table_reads.cpp
@@ -0,0 +1,216 @@
+#include "replace_table_reads.h"
+
+#include <ydb/library/yql/public/purecalc/common/names.h>
+
+#include <ydb/library/yql/core/yql_expr_optimize.h>
+#include <ydb/library/yql/core/yql_expr_type_annotation.h>
+
+using namespace NYql;
+using namespace NYql::NPureCalc;
+
+namespace {
+ class TTableReadsReplacer: public TSyncTransformerBase {
+ private:
+ ui32 InputsNumber_;
+ bool UseSystemColumns_;
+ TString TablePrefix_;
+ TString CallableName_;
+ bool Complete_ = false;
+
+ public:
+ explicit TTableReadsReplacer(
+ ui32 inputsNumber,
+ bool useSystemColumns,
+ TString tablePrefix,
+ TString inputNodeName
+ )
+ : InputsNumber_(inputsNumber)
+ , UseSystemColumns_(useSystemColumns)
+ , TablePrefix_(std::move(tablePrefix))
+ , CallableName_(std::move(inputNodeName))
+ {
+ }
+
+ TTableReadsReplacer(TVector<const TStructExprType*>&&, TString, TString) = delete;
+
+ public:
+ TStatus DoTransform(const TExprNode::TPtr input, TExprNode::TPtr& output, TExprContext& ctx) final {
+ output = input;
+ if (Complete_) {
+ return TStatus::Ok;
+ }
+
+ TOptimizeExprSettings settings(nullptr);
+
+ auto status = OptimizeExpr(input, output, [&](const TExprNode::TPtr& node, TExprContext& ctx) -> TExprNode::TPtr {
+ if (node->IsCallable(NNodes::TCoRight::CallableName())) {
+ TIssueScopeGuard issueScope(ctx.IssueManager, [&]() {
+ return new TIssue(ctx.GetPosition(node->Pos()), TStringBuilder() << "At function: " << node->Content());
+ });
+
+ if (!EnsureMinArgsCount(*node, 1, ctx)) {
+ return nullptr;
+ }
+
+ if (!node->Child(0)->IsCallable(NNodes::TCoRead::CallableName())) {
+ ctx.AddError(TIssue(ctx.GetPosition(node->Child(0)->Pos()), TStringBuilder() << "Expected Read!"));
+ return nullptr;
+ }
+
+ return BuildInputFromRead(node->Pos(), node->ChildPtr(0), ctx);
+ }
+
+ return node;
+ }, ctx, settings);
+
+ if (status.Level == TStatus::Ok) {
+ Complete_ = true;
+ }
+ return status;
+ }
+
+ void Rewind() override {
+ Complete_ = false;
+ }
+
+ private:
+ TExprNode::TPtr BuildInputFromRead(TPositionHandle replacePos, const TExprNode::TPtr& node, TExprContext& ctx) {
+ TIssueScopeGuard issueScope(ctx.IssueManager, [&]() {
+ return MakeIntrusive<TIssue>(ctx.GetPosition(node->Pos()), TStringBuilder() << "At function: " << node->Content());
+ });
+
+ if (!EnsureMinArgsCount(*node, 3, ctx)) {
+ return nullptr;
+ }
+
+ const auto source = node->ChildPtr(2);
+ if (source->IsCallable(NNodes::TCoKey::CallableName())) {
+ return BuildInputFromKey(replacePos, source, ctx);
+ }
+ if (source->IsCallable("DataTables")) {
+ return BuildInputFromDataTables(replacePos, source, ctx);
+ }
+
+ ctx.AddError(TIssue(ctx.GetPosition(source->Pos()), TStringBuilder() << "Unsupported read source: " << source->Content()));
+
+ return nullptr;
+ }
+
+ TExprNode::TPtr BuildInputFromKey(TPositionHandle replacePos, const TExprNode::TPtr& node, TExprContext& ctx) {
+ TIssueScopeGuard issueScope(ctx.IssueManager, [&]() {
+ return MakeIntrusive<TIssue>(ctx.GetPosition(node->Pos()), TStringBuilder() << "At function: " << node->Content());
+ });
+
+ ui32 inputIndex;
+ TExprNode::TPtr inputTableName;
+
+ if (!TryFetchInputIndexFromKey(node, ctx, inputIndex, inputTableName)) {
+ return nullptr;
+ }
+
+ YQL_ENSURE(inputTableName->IsCallable(NNodes::TCoString::CallableName()));
+
+ auto inputNode = ctx.Builder(replacePos)
+ .Callable(CallableName_)
+ .Atom(0, ToString(inputIndex))
+ .Seal()
+ .Build();
+
+ if (UseSystemColumns_) {
+ auto mapLambda = ctx.Builder(replacePos)
+ .Lambda()
+ .Param("row")
+ .Callable(0, NNodes::TCoAddMember::CallableName())
+ .Arg(0, "row")
+ .Atom(1, PurecalcSysColumnTablePath)
+ .Add(2, inputTableName)
+ .Seal()
+ .Seal()
+ .Build();
+
+ return ctx.Builder(replacePos)
+ .Callable(NNodes::TCoMap::CallableName())
+ .Add(0, std::move(inputNode))
+ .Add(1, std::move(mapLambda))
+ .Seal()
+ .Build();
+ }
+
+ return inputNode;
+ }
+
+ TExprNode::TPtr BuildInputFromDataTables(TPositionHandle replacePos, const TExprNode::TPtr& node, TExprContext& ctx) {
+ TIssueScopeGuard issueScope(ctx.IssueManager, [&]() {
+ return MakeIntrusive<TIssue>(ctx.GetPosition(node->Pos()), TStringBuilder() << "At function: " << node->Content());
+ });
+
+ if (!InputsNumber_) {
+ ctx.AddError(TIssue(ctx.GetPosition(node->Pos()), "No inputs provided by input spec"));
+ return nullptr;
+ }
+
+ if (!EnsureArgsCount(*node, 0, ctx)) {
+ return nullptr;
+ }
+
+ auto builder = ctx.Builder(replacePos);
+
+ if (InputsNumber_ > 1) {
+ auto listBuilder = builder.List();
+
+ for (ui32 i = 0; i < InputsNumber_; ++i) {
+ listBuilder.Callable(i, CallableName_).Atom(0, ToString(i)).Seal();
+ }
+
+ return listBuilder.Seal().Build();
+ }
+
+ return builder.Callable(CallableName_).Atom(0, "0").Seal().Build();
+ }
+
+ bool TryFetchInputIndexFromKey(const TExprNode::TPtr& node, TExprContext& ctx, ui32& resultIndex, TExprNode::TPtr& resultTableName) {
+ if (!EnsureArgsCount(*node, 1, ctx)) {
+ return false;
+ }
+
+ const auto* keyArg = node->Child(0);
+ if (!keyArg->IsList() || keyArg->ChildrenSize() != 2 || !keyArg->Child(0)->IsAtom("table") ||
+ !keyArg->Child(1)->IsCallable(NNodes::TCoString::CallableName()))
+ {
+ ctx.AddError(TIssue(ctx.GetPosition(keyArg->Pos()), "Expected single table name"));
+ return false;
+ }
+
+ resultTableName = keyArg->ChildPtr(1);
+
+ auto tableName = resultTableName->Child(0)->Content();
+
+ if (!tableName.StartsWith(TablePrefix_)) {
+ ctx.AddError(TIssue(ctx.GetPosition(resultTableName->Child(0)->Pos()),
+ TStringBuilder() << "Invalid table name " << TString{tableName}.Quote() << ": prefix must be " << TablePrefix_.Quote()));
+ return false;
+ }
+
+ tableName.SkipPrefix(TablePrefix_);
+
+ if (!tableName) {
+ resultIndex = 0;
+ } else if (!TryFromString(tableName, resultIndex)) {
+ ctx.AddError(TIssue(ctx.GetPosition(resultTableName->Child(0)->Pos()),
+ TStringBuilder() << "Invalid table name " << TString{tableName}.Quote() << ": suffix must be UI32 number"));
+ return false;
+ }
+
+ return true;
+ }
+ };
+}
+
+TAutoPtr<IGraphTransformer> NYql::NPureCalc::MakeTableReadsReplacer(
+ ui32 inputsNumber,
+ bool useSystemColumns,
+ TString tablePrefix,
+ TString callableName
+) {
+ return new TTableReadsReplacer(inputsNumber, useSystemColumns, std::move(tablePrefix), std::move(callableName));
+}
diff --git a/ydb/library/yql/public/purecalc/common/transformations/replace_table_reads.h b/ydb/library/yql/public/purecalc/common/transformations/replace_table_reads.h
new file mode 100644
index 0000000000..9c0196800d
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/common/transformations/replace_table_reads.h
@@ -0,0 +1,28 @@
+#pragma once
+
+#include <ydb/library/yql/public/purecalc/common/names.h>
+
+#include <ydb/library/yql/core/yql_graph_transformer.h>
+
+namespace NYql::NPureCalc {
+ /**
+ * SQL translation would generate a standard Read! call to read each input table. It will than generate
+ * a Right! call to get the table data from a tuple returned by Read!. This transformation replaces any Right!
+ * call with a call to special function used to get input data.
+ *
+ * Each table name must starts with the specified prefix and ends with an index of program input (e.g. `Input0`).
+ * Name without numeric suffix is an alias for the first input.
+ *
+ * @param inputStructs types of each input.
+ * @param useSystemColumns whether to allow special system columns in input structs.
+ * @param tablePrefix required prefix for all table names (e.g. `Input`).
+ * @param callableName name of the special callable used to get input data (e.g. `Self`).
+ * @param return a graph transformer for replacing table reads.
+ */
+ TAutoPtr<IGraphTransformer> MakeTableReadsReplacer(
+ ui32 inputsNumber,
+ bool useSystemColumns,
+ TString tablePrefix = TString{PurecalcInputTablePrefix},
+ TString callableName = TString{PurecalcInputCallableName}
+ );
+}
diff --git a/ydb/library/yql/public/purecalc/common/transformations/type_annotation.cpp b/ydb/library/yql/public/purecalc/common/transformations/type_annotation.cpp
new file mode 100644
index 0000000000..9ff39d19e9
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/common/transformations/type_annotation.cpp
@@ -0,0 +1,228 @@
+#include "type_annotation.h"
+
+#include <ydb/library/yql/public/purecalc/common/interface.h>
+#include <ydb/library/yql/public/purecalc/common/inspect_input.h>
+#include <ydb/library/yql/public/purecalc/common/names.h>
+
+#include <ydb/library/yql/core/type_ann/type_ann_core.h>
+#include <ydb/library/yql/core/yql_expr_type_annotation.h>
+
+#include <util/generic/fwd.h>
+
+using namespace NYql;
+using namespace NYql::NPureCalc;
+
+namespace {
+ class TTypeAnnotatorBase: public TSyncTransformerBase {
+ public:
+ using THandler = std::function<TStatus(const TExprNode::TPtr&, TExprNode::TPtr&, TExprContext&)>;
+
+ TTypeAnnotatorBase(TTypeAnnotationContextPtr typeAnnotationContext)
+ {
+ OriginalTransformer_.reset(CreateExtCallableTypeAnnotationTransformer(*typeAnnotationContext).Release());
+ }
+
+ TStatus DoTransform(TExprNode::TPtr input, TExprNode::TPtr& output, TExprContext& ctx) final {
+ if (input->Type() == TExprNode::Callable) {
+ if (auto handler = Handlers_.FindPtr(input->Content())) {
+ return (*handler)(input, output, ctx);
+ }
+ }
+
+ auto status = OriginalTransformer_->Transform(input, output, ctx);
+
+ YQL_ENSURE(status.Level != IGraphTransformer::TStatus::Async, "Async type check is not supported");
+
+ return status;
+ }
+
+ void Rewind() final {
+ OriginalTransformer_->Rewind();
+ }
+
+ protected:
+ void AddHandler(std::initializer_list<TStringBuf> names, THandler handler) {
+ for (auto name: names) {
+ YQL_ENSURE(Handlers_.emplace(name, handler).second, "Duplicate handler for " << name);
+ }
+ }
+
+ template <class TDerived>
+ THandler Hndl(TStatus(TDerived::* handler)(const TExprNode::TPtr&, TExprNode::TPtr&, TExprContext&)) {
+ return [this, handler] (TExprNode::TPtr input, TExprNode::TPtr& output, TExprContext& ctx) {
+ return (static_cast<TDerived*>(this)->*handler)(input, output, ctx);
+ };
+ }
+
+ template <class TDerived>
+ THandler Hndl(TStatus(TDerived::* handler)(const TExprNode::TPtr&, TExprContext&)) {
+ return [this, handler] (TExprNode::TPtr input, TExprNode::TPtr& /*output*/, TExprContext& ctx) {
+ return (static_cast<TDerived*>(this)->*handler)(input, ctx);
+ };
+ }
+
+ private:
+ std::shared_ptr<IGraphTransformer> OriginalTransformer_;
+ THashMap<TStringBuf, THandler> Handlers_;
+ };
+
+ class TTypeAnnotator : public TTypeAnnotatorBase {
+ private:
+ TTypeAnnotationContextPtr TypeAnnotationContext_;
+ const TVector<const TStructExprType*>& InputStructs_;
+ EProcessorMode ProcessorMode_;
+ TString InputNodeName_;
+
+ public:
+ TTypeAnnotator(
+ TTypeAnnotationContextPtr typeAnnotationContext,
+ const TVector<const TStructExprType*>& inputStructs,
+ EProcessorMode processorMode,
+ TString nodeName
+ )
+ : TTypeAnnotatorBase(typeAnnotationContext)
+ , InputStructs_(inputStructs)
+ , ProcessorMode_(processorMode)
+ , InputNodeName_(std::move(nodeName))
+ {
+ AddHandler({InputNodeName_}, Hndl(&TTypeAnnotator::HandleInputNode));
+ AddHandler({NNodes::TCoTableName::CallableName()}, Hndl(&TTypeAnnotator::HandleTableName));
+ AddHandler({NNodes::TCoTablePath::CallableName()}, Hndl(&TTypeAnnotator::HandleTablePath));
+ AddHandler({NNodes::TCoHoppingTraits::CallableName()}, Hndl(&TTypeAnnotator::HandleHoppingTraits));
+ }
+
+ TTypeAnnotator(TTypeAnnotationContextPtr, TVector<const TStructExprType*>&&, EProcessorMode, TString) = delete;
+
+ private:
+ TStatus HandleInputNode(const TExprNode::TPtr& input, TExprContext& ctx) {
+ ui32 inputIndex;
+ if (!TryFetchInputIndexFromSelf(*input, ctx, InputStructs_.size(), inputIndex)) {
+ return IGraphTransformer::TStatus::Error;
+ }
+
+ YQL_ENSURE(inputIndex < InputStructs_.size());
+
+ if (ProcessorMode_ != EProcessorMode::PullList) {
+ input->SetTypeAnn(ctx.MakeType<TStreamExprType>(InputStructs_[inputIndex]));
+ } else {
+ input->SetTypeAnn(ctx.MakeType<TListExprType>(InputStructs_[inputIndex]));
+ }
+
+ return TStatus::Ok;
+ }
+
+ TStatus HandleTableName(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) {
+ if (!EnsureMinMaxArgsCount(*input, 1, 2, ctx)) {
+ return TStatus::Error;
+ }
+
+ if (input->ChildrenSize() > 1) {
+ if (!EnsureAtom(input->Tail(), ctx)) {
+ return TStatus::Error;
+ }
+
+ if (input->Tail().Content() != PurecalcDefaultService) {
+ ctx.AddError(
+ TIssue(
+ ctx.GetPosition(input->Tail().Pos()),
+ TStringBuilder() << "Unsupported system: " << input->Tail().Content()));
+ return TStatus::Error;
+ }
+ }
+
+ if (input->Head().IsCallable(NNodes::TCoDependsOn::CallableName())) {
+ if (!EnsureArgsCount(input->Head(), 1, ctx)) {
+ return TStatus::Error;
+ }
+
+ if (!TryBuildTableNameNode(input->Pos(), input->Head().HeadPtr(), output, ctx)) {
+ return TStatus::Error;
+ }
+ } else {
+ if (!EnsureSpecificDataType(input->Head(), EDataSlot::String, ctx)) {
+ return TStatus::Error;
+ }
+ output = input->HeadPtr();
+ }
+
+ return TStatus::Repeat;
+ }
+
+ TStatus HandleTablePath(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) {
+ if (!EnsureArgsCount(*input, 1, ctx)) {
+ return TStatus::Error;
+ }
+
+ if (!EnsureDependsOn(input->Head(), ctx)) {
+ return TStatus::Error;
+ }
+
+ if (!EnsureArgsCount(input->Head(), 1, ctx)) {
+ return TStatus::Error;
+ }
+
+ if (!TryBuildTableNameNode(input->Pos(), input->Head().HeadPtr(), output, ctx)) {
+ return TStatus::Error;
+ }
+
+ return TStatus::Repeat;
+ }
+
+ TStatus HandleHoppingTraits(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) {
+ Y_UNUSED(output);
+ if (input->ChildrenSize() == 1) {
+ auto children = input->ChildrenList();
+ auto falseArg = ctx.Builder(input->Pos())
+ .Atom("false")
+ .Seal()
+ .Build();
+ children.emplace_back(falseArg);
+ input->ChangeChildrenInplace(std::move(children));
+ return TStatus::Repeat;
+ }
+
+ return TStatus::Ok;
+ }
+
+ private:
+ bool TryBuildTableNameNode(
+ TPositionHandle position, const TExprNode::TPtr& row, TExprNode::TPtr& result, TExprContext& ctx)
+ {
+ if (!EnsureStructType(*row, ctx)) {
+ return false;
+ }
+
+ const auto* structType = row->GetTypeAnn()->Cast<TStructExprType>();
+
+ if (auto pos = structType->FindItem(PurecalcSysColumnTablePath)) {
+ if (!EnsureSpecificDataType(row->Pos(), *structType->GetItems()[*pos]->GetItemType(), EDataSlot::String, ctx)) {
+ return false;
+ }
+
+ result = ctx.Builder(position)
+ .Callable(NNodes::TCoMember::CallableName())
+ .Add(0, row)
+ .Atom(1, PurecalcSysColumnTablePath)
+ .Seal()
+ .Build();
+ } else {
+ result = ctx.Builder(position)
+ .Callable(NNodes::TCoString::CallableName())
+ .Atom(0, "")
+ .Seal()
+ .Build();
+ }
+
+ return true;
+ }
+ };
+}
+
+TAutoPtr<IGraphTransformer> NYql::NPureCalc::MakeTypeAnnotationTransformer(
+ TTypeAnnotationContextPtr typeAnnotationContext,
+ const TVector<const TStructExprType*>& inputStructs,
+ EProcessorMode processorMode,
+ const TString& nodeName
+) {
+ return new TTypeAnnotator(typeAnnotationContext, inputStructs, processorMode, nodeName);
+}
diff --git a/ydb/library/yql/public/purecalc/common/transformations/type_annotation.h b/ydb/library/yql/public/purecalc/common/transformations/type_annotation.h
new file mode 100644
index 0000000000..05a3674ff8
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/common/transformations/type_annotation.h
@@ -0,0 +1,28 @@
+#pragma once
+
+#include <ydb/library/yql/public/purecalc/common/names.h>
+#include <ydb/library/yql/public/purecalc/common/processor_mode.h>
+
+#include <ydb/library/yql/core/yql_graph_transformer.h>
+#include <ydb/library/yql/core/yql_type_annotation.h>
+
+namespace NYql {
+ namespace NPureCalc {
+ /**
+ * Build type annotation transformer that is aware of type of the input rows.
+ *
+ * @param typeAnnotationContext current context.
+ * @param inputStructs types of each input.
+ * @param processorMode current processor mode. This will affect generated input type,
+ * e.g. list node or struct node.
+ * @param nodeName name of the callable used to get input data, e.g. `Self`.
+ * @return a graph transformer for type annotation.
+ */
+ TAutoPtr<IGraphTransformer> MakeTypeAnnotationTransformer(
+ TTypeAnnotationContextPtr typeAnnotationContext,
+ const TVector<const TStructExprType*>& inputStructs,
+ EProcessorMode processorMode,
+ const TString& nodeName = TString{PurecalcInputCallableName}
+ );
+ }
+}
diff --git a/ydb/library/yql/public/purecalc/common/type_from_schema.cpp b/ydb/library/yql/public/purecalc/common/type_from_schema.cpp
new file mode 100644
index 0000000000..7579481335
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/common/type_from_schema.cpp
@@ -0,0 +1,255 @@
+#include "type_from_schema.h"
+
+#include <library/cpp/yson/node/node_io.h>
+
+#include <ydb/library/yql/core/yql_expr_type_annotation.h>
+#include <ydb/library/yql/providers/common/schema/expr/yql_expr_schema.h>
+
+namespace {
+ using namespace NYql;
+
+#define REPORT(...) ctx.AddError(TIssue(TString(TStringBuilder() << __VA_ARGS__)))
+
+ bool CheckStruct(const TStructExprType* got, const TStructExprType* expected, TExprContext& ctx) {
+ auto status = true;
+
+ if (expected) {
+ for (const auto* gotNamedItem : got->GetItems()) {
+ auto expectedIndex = expected->FindItem(gotNamedItem->GetName());
+ if (expectedIndex) {
+ const auto* gotItem = gotNamedItem->GetItemType();
+ const auto* expectedItem = expected->GetItems()[*expectedIndex]->GetItemType();
+
+ auto arg = ctx.NewArgument(TPositionHandle(), "arg");
+ auto fieldConversionStatus = TrySilentConvertTo(arg, *gotItem, *expectedItem, ctx);
+ if (fieldConversionStatus.Level == IGraphTransformer::TStatus::Error) {
+ REPORT("Item " << TString{gotNamedItem->GetName()}.Quote() << " expected to be " <<
+ *expectedItem << ", but got " << *gotItem);
+ status = false;
+ }
+ } else {
+ REPORT("Got unexpected item " << TString{gotNamedItem->GetName()}.Quote());
+ status = false;
+ }
+ }
+
+ for (const auto* expectedNamedItem : expected->GetItems()) {
+ if (expectedNamedItem->GetItemType()->GetKind() == ETypeAnnotationKind::Optional) {
+ continue;
+ }
+ if (!got->FindItem(expectedNamedItem->GetName())) {
+ REPORT("Expected item " << TString{expectedNamedItem->GetName()}.Quote());
+ status = false;
+ }
+ }
+ }
+
+ return status;
+ }
+
+ bool CheckVariantContent(const TStructExprType* got, const TStructExprType* expected, TExprContext& ctx) {
+ auto status = true;
+
+ if (expected) {
+ for (const auto* gotNamedItem : got->GetItems()) {
+ if (!expected->FindItem(gotNamedItem->GetName())) {
+ REPORT("Got unexpected alternative " << TString{gotNamedItem->GetName()}.Quote());
+ status = false;
+ }
+ }
+
+ for (const auto* expectedNamedItem : expected->GetItems()) {
+ if (!got->FindItem(expectedNamedItem->GetName())) {
+ REPORT("Expected alternative " << TString{expectedNamedItem->GetName()}.Quote());
+ status = false;
+ }
+ }
+ }
+
+ for (const auto* gotNamedItem : got->GetItems()) {
+ const auto* gotItem = gotNamedItem->GetItemType();
+ auto expectedIndex = expected ? expected->FindItem(gotNamedItem->GetName()) : Nothing();
+ const auto* expectedItem = expected && expectedIndex ? expected->GetItems()[*expectedIndex]->GetItemType() : nullptr;
+
+ TIssueScopeGuard issueScope(ctx.IssueManager, [&]() {
+ return new TIssue(TPosition(), TStringBuilder() << "Alternative " << TString{gotNamedItem->GetName()}.Quote());
+ });
+
+ if (expectedItem && expectedItem->GetKind() != gotItem->GetKind()) {
+ REPORT("Expected to be " << expectedItem->GetKind() << ", but got " << gotItem->GetKind());
+ status = false;
+ }
+
+ if (gotItem->GetKind() != ETypeAnnotationKind::Struct) {
+ REPORT("Expected to be Struct, but got " << gotItem->GetKind());
+ status = false;
+ }
+
+ const auto* gotStruct = gotItem->Cast<TStructExprType>();
+ const auto* expectedStruct = expectedItem ? expectedItem->Cast<TStructExprType>() : nullptr;
+
+ if (!CheckStruct(gotStruct, expectedStruct, ctx)) {
+ status = false;
+ }
+ }
+
+ return status;
+ }
+
+ bool CheckVariantContent(const TTupleExprType* got, const TTupleExprType* expected, TExprContext& ctx) {
+ if (expected && expected->GetSize() != got->GetSize()) {
+ REPORT("Expected to have " << expected->GetSize() << " alternatives, but got " << got->GetSize());
+ return false;
+ }
+
+ auto status = true;
+
+ for (size_t i = 0; i < got->GetSize(); i++) {
+ const auto* gotItem = got->GetItems()[i];
+ const auto* expectedItem = expected ? expected->GetItems()[i] : nullptr;
+
+ TIssueScopeGuard issueScope(ctx.IssueManager, [i]() {
+ return new TIssue(TPosition(), TStringBuilder() << "Alternative #" << i);
+ });
+
+ if (expectedItem && expectedItem->GetKind() != gotItem->GetKind()) {
+ REPORT("Expected " << expectedItem->GetKind() << ", but got " << gotItem->GetKind());
+ status = false;
+ }
+
+ if (gotItem->GetKind() != ETypeAnnotationKind::Struct) {
+ REPORT("Expected Struct, but got " << gotItem->GetKind());
+ status = false;
+ }
+
+ const auto* gotStruct = gotItem->Cast<TStructExprType>();
+ const auto* expectedStruct = expectedItem ? expectedItem->Cast<TStructExprType>() : nullptr;
+
+ if (!CheckStruct(gotStruct, expectedStruct, ctx)) {
+ status = false;
+ }
+ }
+
+ return status;
+ }
+
+ bool CheckVariant(const TVariantExprType* got, const TVariantExprType* expected, TExprContext& ctx) {
+ if (expected && expected->GetUnderlyingType()->GetKind() != got->GetUnderlyingType()->GetKind()) {
+ REPORT("Expected Variant over " << expected->GetUnderlyingType()->GetKind() <<
+ ", but got Variant over " << got->GetUnderlyingType()->GetKind());
+ return false;
+ }
+
+ switch (got->GetUnderlyingType()->GetKind()) {
+ case ETypeAnnotationKind::Struct:
+ {
+ const auto* gotStruct = got->GetUnderlyingType()->Cast<TStructExprType>();
+ const auto* expectedStruct = expected ? expected->GetUnderlyingType()->Cast<TStructExprType>() : nullptr;
+ return CheckVariantContent(gotStruct, expectedStruct, ctx);
+ }
+ case ETypeAnnotationKind::Tuple:
+ {
+ const auto* gotTuple = got->GetUnderlyingType()->Cast<TTupleExprType>();
+ const auto* expectedTuple = expected ? expected->GetUnderlyingType()->Cast<TTupleExprType>() : nullptr;
+ return CheckVariantContent(gotTuple, expectedTuple, ctx);
+ }
+ default:
+ Y_UNREACHABLE();
+ }
+
+ return false;
+ }
+
+ bool CheckSchema(const TTypeAnnotationNode* got, const TTypeAnnotationNode* expected, TExprContext& ctx, bool allowVariant) {
+ if (expected && expected->GetKind() != got->GetKind()) {
+ REPORT("Expected " << expected->GetKind() << ", but got " << got->GetKind());
+ return false;
+ }
+
+ switch (got->GetKind()) {
+ case ETypeAnnotationKind::Struct:
+ {
+ TIssueScopeGuard issueScope(ctx.IssueManager, []() { return new TIssue(TPosition(), "Toplevel struct"); });
+
+ const auto* gotStruct = got->Cast<TStructExprType>();
+ const auto* expectedStruct = expected ? expected->Cast<TStructExprType>() : nullptr;
+
+ if (!gotStruct->Validate(TPositionHandle(), ctx)) {
+ return false;
+ }
+
+ return CheckStruct(gotStruct, expectedStruct, ctx);
+ }
+ case ETypeAnnotationKind::Variant:
+ if (allowVariant) {
+ TIssueScopeGuard issueScope(ctx.IssueManager, []() { return new TIssue(TPosition(), "Toplevel variant"); });
+
+ const auto* gotVariant = got->Cast<TVariantExprType>();
+ const auto* expectedVariant = expected ? expected->Cast<TVariantExprType>() : nullptr;
+
+ if (!gotVariant->Validate(TPositionHandle(), ctx)) {
+ return false;
+ }
+
+ return CheckVariant(gotVariant, expectedVariant, ctx);
+ }
+ [[fallthrough]];
+ default:
+ if (allowVariant) {
+ REPORT("Expected Struct or Variant, but got " << got->GetKind());
+ } else {
+ REPORT("Expected Struct, but got " << got->GetKind());
+ }
+ return false;
+ }
+ }
+}
+
+namespace NYql::NPureCalc {
+ const TTypeAnnotationNode* MakeTypeFromSchema(const NYT::TNode& yson, TExprContext& ctx) {
+ const auto* type = NCommon::ParseTypeFromYson(yson, ctx);
+
+ if (!type) {
+ ythrow TCompileError("", ctx.IssueManager.GetIssues().ToString())
+ << "Incorrect schema: " << NYT::NodeToYsonString(yson, NYson::EYsonFormat::Text);
+ }
+
+ return type;
+ }
+
+ const TStructExprType* ExtendStructType(
+ const TStructExprType* type, const THashMap<TString, NYT::TNode>& extraColumns, TExprContext& ctx)
+ {
+ if (extraColumns.empty()) {
+ return type;
+ }
+
+ auto items = type->GetItems();
+ for (const auto& pair : extraColumns) {
+ items.push_back(ctx.MakeType<TItemExprType>(pair.first, MakeTypeFromSchema(pair.second, ctx)));
+ }
+
+ auto result = ctx.MakeType<TStructExprType>(items);
+
+ if (!result->Validate(TPosition(), ctx)) {
+ ythrow TCompileError("", ctx.IssueManager.GetIssues().ToString()) << "Incorrect extended struct type";
+ }
+
+ return result;
+ }
+
+ bool ValidateInputSchema(const TTypeAnnotationNode* type, TExprContext& ctx) {
+ TIssueScopeGuard issueScope(ctx.IssueManager, []() { return new TIssue(TPosition(), "Input schema"); });
+ return CheckSchema(type, nullptr, ctx, false);
+ }
+
+ bool ValidateOutputSchema(const TTypeAnnotationNode* type, TExprContext& ctx) {
+ TIssueScopeGuard issueScope(ctx.IssueManager, []() { return new TIssue(TPosition(), "Output schema"); });
+ return CheckSchema(type, nullptr, ctx, true);
+ }
+
+ bool ValidateOutputType(const TTypeAnnotationNode* type, const TTypeAnnotationNode* expected, TExprContext& ctx) {
+ TIssueScopeGuard issueScope(ctx.IssueManager, []() { return new TIssue(TPosition(), "Program return type"); });
+ return CheckSchema(type, expected, ctx, true);
+ }
+}
diff --git a/ydb/library/yql/public/purecalc/common/type_from_schema.h b/ydb/library/yql/public/purecalc/common/type_from_schema.h
new file mode 100644
index 0000000000..395777bc6d
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/common/type_from_schema.h
@@ -0,0 +1,36 @@
+#pragma once
+
+#include <ydb/library/yql/public/purecalc/common/interface.h>
+
+#include <ydb/library/yql/ast/yql_expr.h>
+
+#include <library/cpp/yson/node/node.h>
+
+namespace NYql {
+ namespace NPureCalc {
+ /**
+ * Load struct type from yson. Use methods below to check returned type for correctness.
+ */
+ const TTypeAnnotationNode* MakeTypeFromSchema(const NYT::TNode&, TExprContext&);
+
+ /**
+ * Extend struct type with additional columns. Type of each extra column is loaded from yson.
+ */
+ const TStructExprType* ExtendStructType(const TStructExprType*, const THashMap<TString, NYT::TNode>&, TExprContext&);
+
+ /**
+ * Check if the given type can be used as an input schema, i.e. it is a struct.
+ */
+ bool ValidateInputSchema(const TTypeAnnotationNode* type, TExprContext& ctx);
+
+ /**
+ * Check if the given type can be used as an output schema, i.e. it is a struct or a variant of structs.
+ */
+ bool ValidateOutputSchema(const TTypeAnnotationNode* type, TExprContext& ctx);
+
+ /**
+ * Check if output type can be silently converted to the expected type.
+ */
+ bool ValidateOutputType(const TTypeAnnotationNode* type, const TTypeAnnotationNode* expected, TExprContext& ctx);
+ }
+}
diff --git a/ydb/library/yql/public/purecalc/common/worker.cpp b/ydb/library/yql/public/purecalc/common/worker.cpp
new file mode 100644
index 0000000000..b32560f420
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/common/worker.cpp
@@ -0,0 +1,566 @@
+#include "worker.h"
+#include "compile_mkql.h"
+
+#include <ydb/library/yql/ast/yql_expr.h>
+#include <ydb/library/yql/core/yql_user_data.h>
+#include <ydb/library/yql/core/yql_user_data_storage.h>
+#include <ydb/library/yql/providers/common/comp_nodes/yql_factory.h>
+#include <ydb/library/yql/public/purecalc/common/names.h>
+#include <ydb/library/yql/minikql/mkql_function_registry.h>
+#include <ydb/library/yql/minikql/mkql_node.h>
+#include <ydb/library/yql/minikql/mkql_node_builder.h>
+#include <ydb/library/yql/minikql/mkql_node_cast.h>
+#include <ydb/library/yql/minikql/mkql_node_visitor.h>
+#include <ydb/library/yql/minikql/mkql_node_serialization.h>
+#include <ydb/library/yql/minikql/mkql_program_builder.h>
+#include <ydb/library/yql/minikql/comp_nodes/mkql_factories.h>
+#include <ydb/library/yql/minikql/computation/mkql_computation_node.h>
+#include <ydb/library/yql/minikql/computation/mkql_computation_node_holders.h>
+#include <ydb/library/yql/minikql/computation/mkql_computation_node_impl.h>
+#include <ydb/library/yql/providers/common/mkql/yql_provider_mkql.h>
+#include <ydb/library/yql/providers/common/mkql/yql_type_mkql.h>
+
+#include <library/cpp/random_provider/random_provider.h>
+#include <library/cpp/time_provider/time_provider.h>
+
+#include <util/stream/file.h>
+#include <ydb/library/yql/minikql/computation/mkql_custom_list.h>
+
+using namespace NYql;
+using namespace NYql::NPureCalc;
+
+TWorkerGraph::TWorkerGraph(
+ const TExprNode::TPtr& exprRoot,
+ TExprContext& exprCtx,
+ const TString& serializedProgram,
+ const NKikimr::NMiniKQL::IFunctionRegistry& funcRegistry,
+ const TUserDataTable& userData,
+ const TVector<const TStructExprType*>& inputTypes,
+ const TVector<const TStructExprType*>& originalInputTypes,
+ const TTypeAnnotationNode* outputType,
+ const TString& LLVMSettings,
+ NKikimr::NUdf::ICountersProvider* countersProvider,
+ ui64 nativeYtTypeFlags,
+ TMaybe<ui64> deterministicTimeProviderSeed
+)
+ : ScopedAlloc_(__LOCATION__, NKikimr::TAlignedPagePoolCounters(), funcRegistry.SupportsSizedAllocators())
+ , Env_(ScopedAlloc_)
+ , FuncRegistry_(funcRegistry)
+ , RandomProvider_(CreateDefaultRandomProvider())
+ , TimeProvider_(deterministicTimeProviderSeed ?
+ CreateDeterministicTimeProvider(*deterministicTimeProviderSeed) :
+ CreateDefaultTimeProvider())
+ , LLVMSettings_(LLVMSettings)
+ , NativeYtTypeFlags_(nativeYtTypeFlags)
+{
+ // Build the root MKQL node
+
+ NKikimr::NMiniKQL::TRuntimeNode rootNode;
+ if (exprRoot) {
+ rootNode = CompileMkql(exprRoot, exprCtx, FuncRegistry_, Env_, userData);
+ } else {
+ rootNode = NKikimr::NMiniKQL::DeserializeRuntimeNode(serializedProgram, Env_);
+ }
+
+ // Prepare container for input nodes
+
+ const ui32 inputsCount = inputTypes.size();
+
+ YQL_ENSURE(inputTypes.size() == originalInputTypes.size());
+
+ SelfNodes_.resize(inputsCount, nullptr);
+
+ YQL_ENSURE(SelfNodes_.size() == inputsCount);
+
+ // Setup struct types
+
+ NKikimr::NMiniKQL::TProgramBuilder pgmBuilder(Env_, FuncRegistry_);
+ for (ui32 i = 0; i < inputsCount; ++i) {
+ const auto* type = static_cast<NKikimr::NMiniKQL::TStructType*>(NCommon::BuildType(TPositionHandle(), *inputTypes[i], pgmBuilder));
+ const auto* originalType = type;
+ if (inputTypes[i] != originalInputTypes[i]) {
+ YQL_ENSURE(inputTypes[i]->GetSize() >= originalInputTypes[i]->GetSize());
+ originalType = static_cast<NKikimr::NMiniKQL::TStructType*>(NCommon::BuildType(TPositionHandle(), *originalInputTypes[i], pgmBuilder));
+ }
+
+ InputTypes_.push_back(type);
+ OriginalInputTypes_.push_back(originalType);
+ }
+
+ if (outputType) {
+ OutputType_ = NCommon::BuildType(TPositionHandle(), *outputType, pgmBuilder);
+ }
+ if (!exprRoot) {
+ auto outMkqlType = rootNode.GetStaticType();
+ if (outMkqlType->GetKind() == NKikimr::NMiniKQL::TType::EKind::List) {
+ outMkqlType = static_cast<NKikimr::NMiniKQL::TListType*>(outMkqlType)->GetItemType();
+ } else if (outMkqlType->GetKind() == NKikimr::NMiniKQL::TType::EKind::Stream) {
+ outMkqlType = static_cast<NKikimr::NMiniKQL::TStreamType*>(outMkqlType)->GetItemType();
+ } else {
+ ythrow TCompileError("", "") << "unexpected mkql output type " << NKikimr::NMiniKQL::TType::KindAsStr(outMkqlType->GetKind());
+ }
+ if (OutputType_) {
+ if (!OutputType_->IsSameType(*outMkqlType)) {
+ ythrow TCompileError("", "") << "precompiled program output type doesn't match the output schema";
+ }
+ } else {
+ OutputType_ = outMkqlType;
+ }
+ }
+
+ // Compile computation pattern
+
+ auto selfCallableName = Env_.InternName(PurecalcInputCallableName);
+
+ NKikimr::NMiniKQL::TExploringNodeVisitor explorer;
+ explorer.Walk(rootNode.GetNode(), Env_);
+
+ auto compositeNodeFactory = NKikimr::NMiniKQL::GetCompositeWithBuiltinFactory(
+ {NKikimr::NMiniKQL::GetYqlFactory()}
+ );
+
+ auto nodeFactory = [&](
+ NKikimr::NMiniKQL::TCallable& callable, const NKikimr::NMiniKQL::TComputationNodeFactoryContext& ctx
+ ) -> NKikimr::NMiniKQL::IComputationNode* {
+ if (callable.GetType()->GetNameStr() == selfCallableName) {
+ YQL_ENSURE(callable.GetInputsCount() == 1, "Self takes exactly 1 argument");
+ const auto inputIndex = AS_VALUE(NKikimr::NMiniKQL::TDataLiteral, callable.GetInput(0))->AsValue().Get<ui32>();
+ YQL_ENSURE(inputIndex < inputsCount, "Self index is out of range");
+ YQL_ENSURE(!SelfNodes_[inputIndex], "Self can be called at most once with each index");
+ return SelfNodes_[inputIndex] = new NKikimr::NMiniKQL::TExternalComputationNode(ctx.Mutables);
+ }
+ else {
+ return compositeNodeFactory(callable, ctx);
+ }
+ };
+
+ NKikimr::NMiniKQL::TComputationPatternOpts computationPatternOpts(
+ ScopedAlloc_.Ref(),
+ Env_,
+ nodeFactory,
+ &funcRegistry,
+ NKikimr::NUdf::EValidateMode::None,
+ NKikimr::NUdf::EValidatePolicy::Exception,
+ LLVMSettings,
+ NKikimr::NMiniKQL::EGraphPerProcess::Multi,
+ nullptr,
+ countersProvider);
+
+ ComputationPattern_ = NKikimr::NMiniKQL::MakeComputationPattern(
+ explorer,
+ rootNode,
+ { rootNode.GetNode() },
+ computationPatternOpts);
+
+ ComputationGraph_ = ComputationPattern_->Clone(
+ computationPatternOpts.ToComputationOptions(*RandomProvider_, *TimeProvider_));
+
+ ComputationGraph_->Prepare();
+
+ // Scoped alloc acquires itself on construction. We need to release it before returning control to user.
+ // Note that scoped alloc releases itself on destruction so it is no problem if the above code throws.
+ ScopedAlloc_.Release();
+}
+
+TWorkerGraph::~TWorkerGraph() {
+ // Remember, we've released scoped alloc in constructor? Now, we need to acquire it back before destroying.
+ ScopedAlloc_.Acquire();
+}
+
+template <typename TBase>
+TWorker<TBase>::TWorker(
+ TWorkerFactoryPtr factory,
+ const TExprNode::TPtr& exprRoot,
+ TExprContext& exprCtx,
+ const TString& serializedProgram,
+ const NKikimr::NMiniKQL::IFunctionRegistry& funcRegistry,
+ const TUserDataTable& userData,
+ const TVector<const TStructExprType*>& inputTypes,
+ const TVector<const TStructExprType*>& originalInputTypes,
+ const TTypeAnnotationNode* outputType,
+ const TString& LLVMSettings,
+ NKikimr::NUdf::ICountersProvider* countersProvider,
+ ui64 nativeYtTypeFlags,
+ TMaybe<ui64> deterministicTimeProviderSeed
+)
+ : WorkerFactory_(std::move(factory))
+ , Graph_(exprRoot, exprCtx, serializedProgram, funcRegistry, userData, inputTypes, originalInputTypes, outputType, LLVMSettings,
+ countersProvider, nativeYtTypeFlags, deterministicTimeProviderSeed)
+{
+}
+
+template <typename TBase>
+inline ui32 TWorker<TBase>::GetInputsCount() const {
+ return Graph_.InputTypes_.size();
+}
+
+template <typename TBase>
+inline const NKikimr::NMiniKQL::TStructType* TWorker<TBase>::GetInputType(ui32 inputIndex, bool original) const {
+ const auto& container = original ? Graph_.OriginalInputTypes_ : Graph_.InputTypes_;
+
+ YQL_ENSURE(inputIndex < container.size(), "invalid input index (" << inputIndex << ") in GetInputType call");
+
+ return container[inputIndex];
+}
+
+template <typename TBase>
+inline const NKikimr::NMiniKQL::TStructType* TWorker<TBase>::GetInputType(bool original) const {
+ const auto& container = original ? Graph_.OriginalInputTypes_ : Graph_.InputTypes_;
+
+ YQL_ENSURE(container.size() == 1, "GetInputType() can be used only for single-input programs");
+
+ return container[0];
+}
+
+template <typename TBase>
+inline const NKikimr::NMiniKQL::TType* TWorker<TBase>::GetOutputType() const {
+ return Graph_.OutputType_;
+}
+
+template <typename TBase>
+NYT::TNode TWorker<TBase>::MakeInputSchema(ui32 inputIndex) const {
+ auto p = WorkerFactory_.lock();
+ YQL_ENSURE(p, "Access to destroyed worker factory");
+ return p->MakeInputSchema(inputIndex);
+}
+
+template <typename TBase>
+NYT::TNode TWorker<TBase>::MakeInputSchema() const {
+ auto p = WorkerFactory_.lock();
+ YQL_ENSURE(p, "Access to destroyed worker factory");
+ return p->MakeInputSchema();
+}
+
+template <typename TBase>
+NYT::TNode TWorker<TBase>::MakeOutputSchema() const {
+ auto p = WorkerFactory_.lock();
+ YQL_ENSURE(p, "Access to destroyed worker factory");
+ return p->MakeOutputSchema();
+}
+
+template <typename TBase>
+NYT::TNode TWorker<TBase>::MakeOutputSchema(ui32) const {
+ auto p = WorkerFactory_.lock();
+ YQL_ENSURE(p, "Access to destroyed worker factory");
+ return p->MakeOutputSchema();
+}
+
+template <typename TBase>
+NYT::TNode TWorker<TBase>::MakeOutputSchema(TStringBuf) const {
+ auto p = WorkerFactory_.lock();
+ YQL_ENSURE(p, "Access to destroyed worker factory");
+ return p->MakeOutputSchema();
+}
+
+template <typename TBase>
+NYT::TNode TWorker<TBase>::MakeFullOutputSchema() const {
+ auto p = WorkerFactory_.lock();
+ YQL_ENSURE(p, "Access to destroyed worker factory");
+ return p->MakeFullOutputSchema();
+}
+
+template <typename TBase>
+inline NKikimr::NMiniKQL::TScopedAlloc& TWorker<TBase>::GetScopedAlloc() {
+ return Graph_.ScopedAlloc_;
+}
+
+template <typename TBase>
+inline NKikimr::NMiniKQL::IComputationGraph& TWorker<TBase>::GetGraph() {
+ return *Graph_.ComputationGraph_;
+}
+
+template <typename TBase>
+inline const NKikimr::NMiniKQL::IFunctionRegistry&
+TWorker<TBase>::GetFunctionRegistry() const {
+ return Graph_.FuncRegistry_;
+}
+
+template <typename TBase>
+inline NKikimr::NMiniKQL::TTypeEnvironment&
+TWorker<TBase>::GetTypeEnvironment() {
+ return Graph_.Env_;
+}
+
+template <typename TBase>
+inline const TString& TWorker<TBase>::GetLLVMSettings() const {
+ return Graph_.LLVMSettings_;
+}
+
+template <typename TBase>
+inline ui64 TWorker<TBase>::GetNativeYtTypeFlags() const {
+ return Graph_.NativeYtTypeFlags_;
+}
+
+template <typename TBase>
+ITimeProvider* TWorker<TBase>::GetTimeProvider() const {
+ return Graph_.TimeProvider_.Get();
+}
+
+template <typename TBase>
+void TWorker<TBase>::Release() {
+ if (auto p = WorkerFactory_.lock()) {
+ p->ReturnWorker(this);
+ } else {
+ delete this;
+ }
+}
+
+TPullStreamWorker::~TPullStreamWorker() {
+ auto guard = Guard(GetScopedAlloc());
+ Output_.Clear();
+}
+
+void TPullStreamWorker::SetInput(NKikimr::NUdf::TUnboxedValue&& value, ui32 inputIndex) {
+ const auto inputsCount = Graph_.SelfNodes_.size();
+
+ if (Y_UNLIKELY(inputIndex >= inputsCount)) {
+ ythrow yexception() << "invalid input index (" << inputIndex << ") in SetInput call";
+ }
+
+ if (HasInput_.size() < inputsCount) {
+ HasInput_.resize(inputsCount, false);
+ }
+
+ if (Y_UNLIKELY(HasInput_[inputIndex])) {
+ ythrow yexception() << "input value for #" << inputIndex << " input is already set";
+ }
+
+ auto selfNode = Graph_.SelfNodes_[inputIndex];
+
+ if (selfNode) {
+ YQL_ENSURE(value);
+ selfNode->SetValue(Graph_.ComputationGraph_->GetContext(), std::move(value));
+ }
+
+ HasInput_[inputIndex] = true;
+
+ if (CheckAllInputsSet()) {
+ Output_ = Graph_.ComputationGraph_->GetValue();
+ }
+}
+
+NKikimr::NUdf::TUnboxedValue& TPullStreamWorker::GetOutput() {
+ if (Y_UNLIKELY(!CheckAllInputsSet())) {
+ ythrow yexception() << "some input values have not been set";
+ }
+
+ return Output_;
+}
+
+void TPullStreamWorker::Release() {
+ with_lock(GetScopedAlloc()) {
+ Output_ = NKikimr::NUdf::TUnboxedValue::Invalid();
+ for (auto selfNode: Graph_.SelfNodes_) {
+ if (selfNode) {
+ selfNode->SetValue(Graph_.ComputationGraph_->GetContext(), NKikimr::NUdf::TUnboxedValue::Invalid());
+ }
+ }
+ }
+ HasInput_.clear();
+ TWorker<IPullStreamWorker>::Release();
+}
+
+TPullListWorker::~TPullListWorker() {
+ auto guard = Guard(GetScopedAlloc());
+ Output_.Clear();
+ OutputIterator_.Clear();
+}
+
+void TPullListWorker::SetInput(NKikimr::NUdf::TUnboxedValue&& value, ui32 inputIndex) {
+ const auto inputsCount = Graph_.SelfNodes_.size();
+
+ if (Y_UNLIKELY(inputIndex >= inputsCount)) {
+ ythrow yexception() << "invalid input index (" << inputIndex << ") in SetInput call";
+ }
+
+ if (HasInput_.size() < inputsCount) {
+ HasInput_.resize(inputsCount, false);
+ }
+
+ if (Y_UNLIKELY(HasInput_[inputIndex])) {
+ ythrow yexception() << "input value for #" << inputIndex << " input is already set";
+ }
+
+ auto selfNode = Graph_.SelfNodes_[inputIndex];
+
+ if (selfNode) {
+ YQL_ENSURE(value);
+ selfNode->SetValue(Graph_.ComputationGraph_->GetContext(), std::move(value));
+ }
+
+ HasInput_[inputIndex] = true;
+
+ if (CheckAllInputsSet()) {
+ Output_ = Graph_.ComputationGraph_->GetValue();
+ ResetOutputIterator();
+ }
+}
+
+NKikimr::NUdf::TUnboxedValue& TPullListWorker::GetOutput() {
+ if (Y_UNLIKELY(!CheckAllInputsSet())) {
+ ythrow yexception() << "some input values have not been set";
+ }
+
+ return Output_;
+}
+
+NKikimr::NUdf::TUnboxedValue& TPullListWorker::GetOutputIterator() {
+ if (Y_UNLIKELY(!CheckAllInputsSet())) {
+ ythrow yexception() << "some input values have not been set";
+ }
+
+ return OutputIterator_;
+}
+
+void TPullListWorker::ResetOutputIterator() {
+ if (Y_UNLIKELY(!CheckAllInputsSet())) {
+ ythrow yexception() << "some input values have not been set";
+ }
+
+ OutputIterator_ = Output_.GetListIterator();
+}
+
+void TPullListWorker::Release() {
+ with_lock(GetScopedAlloc()) {
+ Output_ = NKikimr::NUdf::TUnboxedValue::Invalid();
+ OutputIterator_ = NKikimr::NUdf::TUnboxedValue::Invalid();
+
+ for (auto selfNode: Graph_.SelfNodes_) {
+ if (selfNode) {
+ selfNode->SetValue(Graph_.ComputationGraph_->GetContext(), NKikimr::NUdf::TUnboxedValue::Invalid());
+ }
+ }
+ }
+ HasInput_.clear();
+ TWorker<IPullListWorker>::Release();
+}
+
+namespace {
+ class TPushStream final: public NKikimr::NMiniKQL::TCustomListValue {
+ private:
+ mutable bool HasIterator_ = false;
+ bool HasValue_ = false;
+ bool IsFinished_ = false;
+ NKikimr::NUdf::TUnboxedValue Value_ = NKikimr::NUdf::TUnboxedValue::Invalid();
+
+ public:
+ using TCustomListValue::TCustomListValue;
+
+ public:
+ void SetValue(NKikimr::NUdf::TUnboxedValue&& value) {
+ Value_ = std::move(value);
+ HasValue_ = true;
+ }
+
+ void SetFinished() {
+ IsFinished_ = true;
+ }
+
+ NKikimr::NUdf::TUnboxedValue GetListIterator() const override {
+ YQL_ENSURE(!HasIterator_, "only one pass over input is supported");
+ HasIterator_ = true;
+ return NKikimr::NUdf::TUnboxedValuePod(const_cast<TPushStream*>(this));
+ }
+
+ NKikimr::NUdf::EFetchStatus Fetch(NKikimr::NUdf::TUnboxedValue& result) override {
+ if (IsFinished_) {
+ return NKikimr::NUdf::EFetchStatus::Finish;
+ } else if (!HasValue_) {
+ return NKikimr::NUdf::EFetchStatus::Yield;
+ } else {
+ result = std::move(Value_);
+ HasValue_ = false;
+ return NKikimr::NUdf::EFetchStatus::Ok;
+ }
+ }
+ };
+}
+
+void TPushStreamWorker::FeedToConsumer() {
+ auto value = Graph_.ComputationGraph_->GetValue();
+
+ for (;;) {
+ NKikimr::NUdf::TUnboxedValue item;
+ auto status = value.Fetch(item);
+
+ if (status != NKikimr::NUdf::EFetchStatus::Ok) {
+ break;
+ }
+
+ Consumer_->OnObject(&item);
+ }
+}
+
+void TPushStreamWorker::SetConsumer(THolder<IConsumer<const NKikimr::NUdf::TUnboxedValue*>> consumer) {
+ auto guard = Guard(GetScopedAlloc());
+ const auto inputsCount = Graph_.SelfNodes_.size();
+
+ YQL_ENSURE(inputsCount < 2, "push stream mode doesn't support several inputs");
+ YQL_ENSURE(!Consumer_, "consumer is already set");
+
+ Consumer_ = std::move(consumer);
+
+ if (inputsCount == 1) {
+ SelfNode_ = Graph_.SelfNodes_[0];
+ }
+
+ if (SelfNode_) {
+ SelfNode_->SetValue(
+ Graph_.ComputationGraph_->GetContext(),
+ Graph_.ComputationGraph_->GetHolderFactory().Create<TPushStream>());
+ }
+
+ FeedToConsumer();
+}
+
+void TPushStreamWorker::Push(NKikimr::NUdf::TUnboxedValue&& value) {
+ YQL_ENSURE(Consumer_, "consumer is not set");
+ YQL_ENSURE(!Finished_, "OnFinish has already been sent to the consumer; no new values can be pushed");
+
+ if (Y_LIKELY(SelfNode_)) {
+ static_cast<TPushStream*>(SelfNode_->GetValue(Graph_.ComputationGraph_->GetContext()).AsBoxed().Get())->SetValue(std::move(value));
+ }
+
+ FeedToConsumer();
+}
+
+void TPushStreamWorker::OnFinish() {
+ YQL_ENSURE(Consumer_, "consumer is not set");
+ YQL_ENSURE(!Finished_, "already finished");
+
+ if (Y_LIKELY(SelfNode_)) {
+ static_cast<TPushStream*>(SelfNode_->GetValue(Graph_.ComputationGraph_->GetContext()).AsBoxed().Get())->SetFinished();
+ }
+
+ FeedToConsumer();
+
+ Consumer_->OnFinish();
+
+ Finished_ = true;
+}
+
+void TPushStreamWorker::Release() {
+ with_lock(GetScopedAlloc()) {
+ Consumer_.Destroy();
+ if (SelfNode_) {
+ SelfNode_->SetValue(Graph_.ComputationGraph_->GetContext(), NKikimr::NUdf::TUnboxedValue::Invalid());
+ }
+ SelfNode_ = nullptr;
+ }
+ Finished_ = false;
+ TWorker<IPushStreamWorker>::Release();
+}
+
+
+namespace NYql {
+ namespace NPureCalc {
+ template
+ class TWorker<IPullStreamWorker>;
+
+ template
+ class TWorker<IPullListWorker>;
+
+ template
+ class TWorker<IPushStreamWorker>;
+ }
+}
diff --git a/ydb/library/yql/public/purecalc/common/worker.h b/ydb/library/yql/public/purecalc/common/worker.h
new file mode 100644
index 0000000000..4d1f0889db
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/common/worker.h
@@ -0,0 +1,168 @@
+#pragma once
+
+#include <ydb/library/yql/public/purecalc/common/interface.h>
+
+#include <ydb/library/yql/public/udf/udf_value.h>
+#include <ydb/library/yql/ast/yql_expr.h>
+#include <ydb/library/yql/core/yql_user_data.h>
+#include <ydb/library/yql/minikql/mkql_alloc.h>
+#include <ydb/library/yql/minikql/mkql_node.h>
+#include <ydb/library/yql/minikql/mkql_node_visitor.h>
+#include <ydb/library/yql/minikql/computation/mkql_computation_node.h>
+#include <ydb/library/yql/providers/common/mkql/yql_provider_mkql.h>
+
+#include <memory>
+
+namespace NYql {
+ namespace NPureCalc {
+ struct TWorkerGraph {
+ TWorkerGraph(
+ const TExprNode::TPtr& exprRoot,
+ TExprContext& exprCtx,
+ const TString& serializedProgram,
+ const NKikimr::NMiniKQL::IFunctionRegistry& funcRegistry,
+ const TUserDataTable& userData,
+ const TVector<const TStructExprType*>& inputTypes,
+ const TVector<const TStructExprType*>& originalInputTypes,
+ const TTypeAnnotationNode* outputType,
+ const TString& LLVMSettings,
+ NKikimr::NUdf::ICountersProvider* countersProvider,
+ ui64 nativeYtTypeFlags,
+ TMaybe<ui64> deterministicTimeProviderSeed
+ );
+
+ ~TWorkerGraph();
+
+ NKikimr::NMiniKQL::TScopedAlloc ScopedAlloc_;
+ NKikimr::NMiniKQL::TTypeEnvironment Env_;
+ const NKikimr::NMiniKQL::IFunctionRegistry& FuncRegistry_;
+ TIntrusivePtr<IRandomProvider> RandomProvider_;
+ TIntrusivePtr<ITimeProvider> TimeProvider_;
+ NKikimr::NMiniKQL::IComputationPattern::TPtr ComputationPattern_;
+ THolder<NKikimr::NMiniKQL::IComputationGraph> ComputationGraph_;
+ TString LLVMSettings_;
+ ui64 NativeYtTypeFlags_;
+ TMaybe<TString> TimestampColumn_;
+ const NKikimr::NMiniKQL::TType* OutputType_;
+ TVector<NKikimr::NMiniKQL::IComputationExternalNode*> SelfNodes_;
+ TVector<const NKikimr::NMiniKQL::TStructType*> InputTypes_;
+ TVector<const NKikimr::NMiniKQL::TStructType*> OriginalInputTypes_;
+ };
+
+ template <typename TBase>
+ class TWorker: public TBase {
+ public:
+ using TWorkerFactoryPtr = std::weak_ptr<IWorkerFactory>;
+ private:
+ // Worker factory implementation should stay alive for this worker to operate correctly.
+ TWorkerFactoryPtr WorkerFactory_;
+
+ protected:
+ TWorkerGraph Graph_;
+
+ public:
+ TWorker(
+ TWorkerFactoryPtr factory,
+ const TExprNode::TPtr& exprRoot,
+ TExprContext& exprCtx,
+ const TString& serializedProgram,
+ const NKikimr::NMiniKQL::IFunctionRegistry& funcRegistry,
+ const TUserDataTable& userData,
+ const TVector<const TStructExprType*>& inputTypes,
+ const TVector<const TStructExprType*>& originalInputTypes,
+ const TTypeAnnotationNode* outputType,
+ const TString& LLVMSettings,
+ NKikimr::NUdf::ICountersProvider* countersProvider,
+ ui64 nativeYtTypeFlags,
+ TMaybe<ui64> deterministicTimeProviderSeed
+ );
+
+ public:
+ ui32 GetInputsCount() const override;
+ const NKikimr::NMiniKQL::TStructType* GetInputType(ui32, bool) const override;
+ const NKikimr::NMiniKQL::TStructType* GetInputType(bool) const override;
+ const NKikimr::NMiniKQL::TType* GetOutputType() const override;
+ NYT::TNode MakeInputSchema() const override;
+ NYT::TNode MakeInputSchema(ui32) const override;
+ NYT::TNode MakeOutputSchema() const override;
+ NYT::TNode MakeOutputSchema(ui32) const override;
+ NYT::TNode MakeOutputSchema(TStringBuf) const override;
+ NYT::TNode MakeFullOutputSchema() const override;
+ NKikimr::NMiniKQL::TScopedAlloc& GetScopedAlloc() override;
+ NKikimr::NMiniKQL::IComputationGraph& GetGraph() override;
+ const NKikimr::NMiniKQL::IFunctionRegistry& GetFunctionRegistry() const override;
+ NKikimr::NMiniKQL::TTypeEnvironment& GetTypeEnvironment() override;
+ const TString& GetLLVMSettings() const override;
+ ui64 GetNativeYtTypeFlags() const override;
+ ITimeProvider* GetTimeProvider() const override;
+ protected:
+ void Release() override;
+ };
+
+ class TPullStreamWorker final: public TWorker<IPullStreamWorker> {
+ private:
+ NKikimr::NUdf::TUnboxedValue Output_ = NKikimr::NUdf::TUnboxedValue::Invalid();
+ TVector<bool> HasInput_;
+
+ inline bool CheckAllInputsSet() {
+ return AllOf(HasInput_, [](bool x) { return x; });
+ }
+
+ public:
+ using TWorker::TWorker;
+ ~TPullStreamWorker();
+
+ public:
+ void SetInput(NKikimr::NUdf::TUnboxedValue&&, ui32) override;
+ NKikimr::NUdf::TUnboxedValue& GetOutput() override;
+
+ protected:
+ void Release() override;
+ };
+
+ class TPullListWorker final: public TWorker<IPullListWorker> {
+ private:
+ NKikimr::NUdf::TUnboxedValue Output_ = NKikimr::NUdf::TUnboxedValue::Invalid();
+ NKikimr::NUdf::TUnboxedValue OutputIterator_ = NKikimr::NUdf::TUnboxedValue::Invalid();
+ TVector<bool> HasInput_;
+
+ inline bool CheckAllInputsSet() {
+ return AllOf(HasInput_, [](bool x) { return x; });
+ }
+
+ public:
+ using TWorker::TWorker;
+ ~TPullListWorker();
+
+ public:
+ void SetInput(NKikimr::NUdf::TUnboxedValue&&, ui32) override;
+ NKikimr::NUdf::TUnboxedValue& GetOutput() override;
+ NKikimr::NUdf::TUnboxedValue& GetOutputIterator() override;
+ void ResetOutputIterator() override;
+
+ protected:
+ void Release() override;
+ };
+
+ class TPushStreamWorker final: public TWorker<IPushStreamWorker> {
+ private:
+ THolder<IConsumer<const NKikimr::NUdf::TUnboxedValue*>> Consumer_{};
+ bool Finished_ = false;
+ NKikimr::NMiniKQL::IComputationExternalNode* SelfNode_ = nullptr;
+
+ public:
+ using TWorker::TWorker;
+
+ private:
+ void FeedToConsumer();
+
+ public:
+ void SetConsumer(THolder<IConsumer<const NKikimr::NUdf::TUnboxedValue*>>) override;
+ void Push(NKikimr::NUdf::TUnboxedValue&&) override;
+ void OnFinish() override;
+
+ protected:
+ void Release() override;
+ };
+ }
+}
diff --git a/ydb/library/yql/public/purecalc/common/worker_factory.cpp b/ydb/library/yql/public/purecalc/common/worker_factory.cpp
new file mode 100644
index 0000000000..223dee8c1b
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/common/worker_factory.cpp
@@ -0,0 +1,454 @@
+#include "worker_factory.h"
+
+#include "type_from_schema.h"
+#include "worker.h"
+#include "compile_mkql.h"
+
+#include <ydb/library/yql/sql/sql.h>
+#include <ydb/library/yql/ast/yql_expr.h>
+#include <ydb/library/yql/core/yql_expr_optimize.h>
+#include <ydb/library/yql/core/yql_type_helpers.h>
+#include <ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.h>
+#include <ydb/library/yql/providers/common/codec/yql_codec.h>
+#include <ydb/library/yql/providers/common/udf_resolve/yql_simple_udf_resolver.h>
+#include <ydb/library/yql/providers/common/schema/expr/yql_expr_schema.h>
+#include <ydb/library/yql/providers/common/provider/yql_provider.h>
+#include <ydb/library/yql/minikql/mkql_node.h>
+#include <ydb/library/yql/minikql/mkql_node_serialization.h>
+#include <ydb/library/yql/minikql/mkql_alloc.h>
+#include <ydb/library/yql/minikql/aligned_page_pool.h>
+#include <ydb/library/yql/core/services/yql_transform_pipeline.h>
+#include <ydb/library/yql/public/purecalc/common/names.h>
+#include <ydb/library/yql/public/purecalc/common/transformations/type_annotation.h>
+#include <ydb/library/yql/public/purecalc/common/transformations/align_output_schema.h>
+#include <ydb/library/yql/public/purecalc/common/transformations/extract_used_columns.h>
+#include <ydb/library/yql/public/purecalc/common/transformations/output_columns_filter.h>
+#include <ydb/library/yql/public/purecalc/common/transformations/replace_table_reads.h>
+#include <ydb/library/yql/utils/log/log.h>
+#include <util/stream/trace.h>
+
+using namespace NYql;
+using namespace NYql::NPureCalc;
+
+template <typename TBase>
+TWorkerFactory<TBase>::TWorkerFactory(TWorkerFactoryOptions options, EProcessorMode processorMode)
+ : Factory_(std::move(options.Factory))
+ , FuncRegistry_(std::move(options.FuncRegistry))
+ , UserData_(std::move(options.UserData))
+ , LLVMSettings_(std::move(options.LLVMSettings))
+ , CountersProvider_(options.CountersProvider_)
+ , NativeYtTypeFlags_(options.NativeYtTypeFlags_)
+ , DeterministicTimeProviderSeed_(options.DeterministicTimeProviderSeed_)
+ , UseSystemColumns_(options.UseSystemColumns)
+ , UseWorkerPool_(options.UseWorkerPool)
+{
+ // Prepare input struct types and extract all column names from inputs
+
+ const auto& inputSchemas = options.InputSpec.GetSchemas();
+ const auto& allVirtualColumns = options.InputSpec.GetAllVirtualColumns();
+
+ YQL_ENSURE(inputSchemas.size() == allVirtualColumns.size());
+
+ const auto inputsCount = inputSchemas.size();
+
+ for (ui32 i = 0; i < inputsCount; ++i) {
+ const auto* originalInputType = MakeTypeFromSchema(inputSchemas[i], ExprContext_);
+ if (!ValidateInputSchema(originalInputType, ExprContext_)) {
+ ythrow TCompileError("", ExprContext_.IssueManager.GetIssues().ToString()) << "invalid schema for #" << i << " input";
+ }
+
+ const auto* originalStructType = originalInputType->template Cast<TStructExprType>();
+ const auto* structType = ExtendStructType(originalStructType, allVirtualColumns[i], ExprContext_);
+
+ InputTypes_.push_back(structType);
+ OriginalInputTypes_.push_back(originalStructType);
+
+ auto& columnsSet = AllColumns_.emplace_back();
+ for (const auto* structItem : structType->GetItems()) {
+ columnsSet.insert(TString(structItem->GetName()));
+
+ if (!UseSystemColumns_ && structItem->GetName().StartsWith(PurecalcSysColumnsPrefix)) {
+ ythrow TCompileError("", ExprContext_.IssueManager.GetIssues().ToString())
+ << "#" << i << " input provides system column " << structItem->GetName()
+ << ", but it is forbidden by options";
+ }
+ }
+ }
+
+ // Prepare output type
+
+ auto outputSchema = options.OutputSpec.GetSchema();
+ if (!outputSchema.IsNull()) {
+ OutputType_ = MakeTypeFromSchema(outputSchema, ExprContext_);
+ if (!ValidateOutputSchema(OutputType_, ExprContext_)) {
+ ythrow TCompileError("", ExprContext_.IssueManager.GetIssues().ToString()) << "invalid output schema";
+ }
+ } else {
+ OutputType_ = nullptr;
+ }
+
+ // Translate
+
+ if (options.TranslationMode_ == ETranslationMode::Mkql) {
+ SerializedProgram_ = TString{options.Query};
+ } else {
+ ExprRoot_ = Compile(options.Query, ETranslationMode::SQL == options.TranslationMode_,
+ options.ModuleResolver, options.SyntaxVersion_, options.Modules, options.OutputSpec, processorMode);
+
+ // Deduce output type if it wasn't provided by output spec
+
+ if (!OutputType_) {
+ OutputType_ = GetSequenceItemType(ExprRoot_->Pos(), ExprRoot_->GetTypeAnn(), true, ExprContext_);
+ }
+ if (!OutputType_) {
+ ythrow TCompileError("", ExprContext_.IssueManager.GetIssues().ToString()) << "cannot deduce output schema";
+ }
+ }
+}
+
+template <typename TBase>
+TExprNode::TPtr TWorkerFactory<TBase>::Compile(
+ TStringBuf query,
+ bool sql,
+ IModuleResolver::TPtr moduleResolver,
+ ui16 syntaxVersion,
+ const THashMap<TString, TString>& modules,
+ const TOutputSpecBase& outputSpec,
+ EProcessorMode processorMode
+) {
+ // Prepare type annotation context
+
+ TTypeAnnotationContextPtr typeContext;
+
+ typeContext = MakeIntrusive<TTypeAnnotationContext>();
+ typeContext->RandomProvider = CreateDefaultRandomProvider();
+ typeContext->TimeProvider = DeterministicTimeProviderSeed_ ?
+ CreateDeterministicTimeProvider(*DeterministicTimeProviderSeed_) :
+ CreateDefaultTimeProvider();
+ typeContext->UdfResolver = NCommon::CreateSimpleUdfResolver(FuncRegistry_.Get());
+ typeContext->UserDataStorage = MakeIntrusive<TUserDataStorage>(nullptr, UserData_, nullptr, nullptr);
+ typeContext->Modules = moduleResolver;
+ typeContext->Initialize(ExprContext_);
+
+ if (auto modules = dynamic_cast<TModuleResolver*>(moduleResolver.get())) {
+ modules->AttachUserData(typeContext->UserDataStorage);
+ }
+
+ // Parse SQL/s-expr into AST
+
+ TAstParseResult astRes;
+
+ if (sql) {
+ NSQLTranslation::TTranslationSettings settings;
+
+ typeContext->DeprecatedSQL = (syntaxVersion == 0);
+ settings.SyntaxVersion = syntaxVersion;
+ settings.V0Behavior = NSQLTranslation::EV0Behavior::Disable;
+ settings.Mode = NSQLTranslation::ESqlMode::LIMITED_VIEW;
+ settings.DefaultCluster = PurecalcDefaultCluster;
+ settings.ClusterMapping[settings.DefaultCluster] = PurecalcDefaultService;
+ settings.ModuleMapping = modules;
+ settings.EnableGenericUdfs = true;
+ settings.File = "generated.sql";
+ for (const auto& [key, block] : UserData_) {
+ TStringBuf alias(key.Alias());
+ if (block.Usage.Test(EUserDataBlockUsage::Library) && !alias.StartsWith("/lib")) {
+ alias.SkipPrefix("/home/");
+ settings.Libraries.emplace(alias);
+ }
+ }
+
+ astRes = SqlToYql(TString(query), settings);
+ } else {
+ astRes = ParseAst(TString(query));
+ }
+
+ if (!astRes.IsOk()) {
+ ythrow TCompileError(TString(query), astRes.Issues.ToString()) << "failed to parse " << (sql ? ETranslationMode::SQL : ETranslationMode::SExpr);
+ }
+
+ ExprContext_.IssueManager.AddIssues(astRes.Issues);
+
+ if (ETraceLevel::TRACE_DETAIL <= StdDbgLevel()) {
+ Cdbg << "Before optimization:" << Endl;
+ astRes.Root->PrettyPrintTo(Cdbg, TAstPrintFlags::PerLine | TAstPrintFlags::ShortQuote | TAstPrintFlags::AdaptArbitraryContent);
+ }
+
+ // Translate AST into expression
+
+ TExprNode::TPtr exprRoot;
+ if (!CompileExpr(*astRes.Root, exprRoot, ExprContext_, moduleResolver.get(), 0, syntaxVersion)) {
+ TStringStream astStr;
+ astRes.Root->PrettyPrintTo(astStr, TAstPrintFlags::ShortQuote | TAstPrintFlags::PerLine);
+ ythrow TCompileError(astStr.Str(), ExprContext_.IssueManager.GetIssues().ToString()) << "failed to compile";
+ }
+
+
+ // Prepare transformation pipeline
+ THolder<IGraphTransformer> calcTransformer = CreateFunctorTransformer([&](TExprNode::TPtr input, TExprNode::TPtr& output, TExprContext& ctx)
+ -> IGraphTransformer::TStatus
+ {
+ output = input;
+ auto valueNode = input->HeadPtr();
+
+ auto peepHole = MakePeepholeOptimization(typeContext);
+ auto status = SyncTransform(*peepHole, valueNode, ctx);
+ if (status != IGraphTransformer::TStatus::Ok) {
+ return status;
+ }
+
+ TStringStream out;
+ NYson::TYsonWriter writer(&out, NYson::EYsonFormat::Text, ::NYson::EYsonType::Node, true);
+ writer.OnBeginMap();
+
+ writer.OnKeyedItem("Data");
+
+ TWorkerGraph graph(
+ valueNode,
+ ctx,
+ {},
+ *FuncRegistry_,
+ UserData_,
+ {},
+ {},
+ valueNode->GetTypeAnn(),
+ LLVMSettings_,
+ CountersProvider_,
+ NativeYtTypeFlags_,
+ DeterministicTimeProviderSeed_
+ );
+
+ with_lock (graph.ScopedAlloc_) {
+ const auto value = graph.ComputationGraph_->GetValue();
+ NCommon::WriteYsonValue(writer, value, const_cast<NKikimr::NMiniKQL::TType*>(graph.OutputType_), nullptr);
+ }
+ writer.OnEndMap();
+
+ auto ysonAtom = ctx.NewAtom(TPositionHandle(), out.Str());
+ input->SetResult(std::move(ysonAtom));
+ return IGraphTransformer::TStatus::Ok;
+ });
+
+ TTransformationPipeline pipeline(typeContext);
+
+ pipeline.Add(MakeTableReadsReplacer(InputTypes_.size(), UseSystemColumns_),
+ "ReplaceTableReads", EYqlIssueCode::TIssuesIds_EIssueCode_DEFAULT_ERROR,
+ "Replace reads from tables");
+ pipeline.AddServiceTransformers();
+ pipeline.AddPreTypeAnnotation();
+ pipeline.AddExpressionEvaluation(*FuncRegistry_, calcTransformer.Get());
+ pipeline.AddIOAnnotation();
+ pipeline.AddTypeAnnotationTransformer(MakeTypeAnnotationTransformer(typeContext, InputTypes_, processorMode));
+ pipeline.AddPostTypeAnnotation();
+ pipeline.Add(CreateFunctorTransformer(
+ [&](const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) {
+ return OptimizeExpr(input, output, [](const TExprNode::TPtr& node, TExprContext&) -> TExprNode::TPtr {
+ if (node->IsCallable("Unordered") && node->Child(0)->IsCallable(PurecalcInputCallableName)) {
+ return node->ChildPtr(0);
+ }
+ return node;
+ }, ctx, TOptimizeExprSettings(nullptr));
+ }), "Unordered", EYqlIssueCode::TIssuesIds_EIssueCode_DEFAULT_ERROR,
+ "Unordered optimizations");
+ pipeline.Add(MakeOutputColumnsFilter(outputSpec.GetOutputColumnsFilter()),
+ "Filter", EYqlIssueCode::TIssuesIds_EIssueCode_DEFAULT_ERROR,
+ "Filter output columns");
+ pipeline.Add(MakeOutputAligner(OutputType_, processorMode),
+ "Convert", EYqlIssueCode::TIssuesIds_EIssueCode_DEFAULT_ERROR,
+ "Align return type of the program to output schema");
+ pipeline.AddCommonOptimization();
+ pipeline.AddFinalCommonOptimization();
+ pipeline.Add(MakeUsedColumnsExtractor(&UsedColumns_, AllColumns_),
+ "ExtractColumns", EYqlIssueCode::TIssuesIds_EIssueCode_DEFAULT_ERROR,
+ "Extract used columns");
+ pipeline.Add(MakePeepholeOptimization(typeContext),
+ "PeepHole", EYqlIssueCode::TIssuesIds_EIssueCode_DEFAULT_ERROR,
+ "Peephole optimizations");
+ pipeline.AddCheckExecution(false);
+
+ // Apply optimizations
+
+ auto transformer = pipeline.Build();
+ auto status = SyncTransform(*transformer, exprRoot, ExprContext_);
+ auto transformStats = transformer->GetStatistics();
+ TStringStream out;
+ NYson::TYsonWriter writer(&out, NYson::EYsonFormat::Pretty);
+ NCommon::TransformerStatsToYson("", transformStats, writer);
+ YQL_CLOG(DEBUG, Core) << "Transform stats: " << out.Str();
+ if (status == IGraphTransformer::TStatus::Error) {
+ ythrow TCompileError("", ExprContext_.IssueManager.GetIssues().ToString()) << "Failed to optimize";
+ }
+
+ if (ETraceLevel::TRACE_DETAIL <= StdDbgLevel()) {
+ Cdbg << "After optimization:" << Endl;
+ ConvertToAst(*exprRoot, ExprContext_, 0, true).Root->PrettyPrintTo(Cdbg, TAstPrintFlags::PerLine | TAstPrintFlags::ShortQuote | TAstPrintFlags::AdaptArbitraryContent);
+ }
+ return exprRoot;
+}
+
+template <typename TBase>
+NYT::TNode TWorkerFactory<TBase>::MakeInputSchema(ui32 inputIndex) const {
+ Y_ENSURE(
+ inputIndex < InputTypes_.size(),
+ "invalid input index (" << inputIndex << ") in MakeInputSchema call");
+
+ return NCommon::TypeToYsonNode(InputTypes_[inputIndex]);
+}
+
+template <typename TBase>
+NYT::TNode TWorkerFactory<TBase>::MakeInputSchema() const {
+ Y_ENSURE(
+ InputTypes_.size() == 1,
+ "MakeInputSchema() can be used only with single-input programs");
+
+ return NCommon::TypeToYsonNode(InputTypes_[0]);
+}
+
+template <typename TBase>
+NYT::TNode TWorkerFactory<TBase>::MakeOutputSchema() const {
+ Y_ENSURE(OutputType_, "MakeOutputSchema() cannot be used with precompiled programs");
+ Y_ENSURE(
+ OutputType_->GetKind() == ETypeAnnotationKind::Struct,
+ "MakeOutputSchema() cannot be used with multi-output programs");
+
+ return NCommon::TypeToYsonNode(OutputType_);
+}
+
+template <typename TBase>
+NYT::TNode TWorkerFactory<TBase>::MakeOutputSchema(ui32 index) const {
+ Y_ENSURE(OutputType_, "MakeOutputSchema() cannot be used with precompiled programs");
+ Y_ENSURE(
+ OutputType_->GetKind() == ETypeAnnotationKind::Variant,
+ "MakeOutputSchema(ui32) cannot be used with single-output programs");
+
+ auto vtype = OutputType_->template Cast<TVariantExprType>();
+
+ Y_ENSURE(
+ vtype->GetUnderlyingType()->GetKind() == ETypeAnnotationKind::Tuple,
+ "MakeOutputSchema(ui32) cannot be used to process variants over struct");
+
+ auto ttype = vtype->GetUnderlyingType()->template Cast<TTupleExprType>();
+
+ Y_ENSURE(
+ index < ttype->GetSize(),
+ "Invalid table index " << index);
+
+ return NCommon::TypeToYsonNode(ttype->GetItems()[index]);
+}
+
+template <typename TBase>
+NYT::TNode TWorkerFactory<TBase>::MakeOutputSchema(TStringBuf tableName) const {
+ Y_ENSURE(OutputType_, "MakeOutputSchema() cannot be used with precompiled programs");
+ Y_ENSURE(
+ OutputType_->GetKind() == ETypeAnnotationKind::Variant,
+ "MakeOutputSchema(TStringBuf) cannot be used with single-output programs");
+
+ auto vtype = OutputType_->template Cast<TVariantExprType>();
+
+ Y_ENSURE(
+ vtype->GetUnderlyingType()->GetKind() == ETypeAnnotationKind::Struct,
+ "MakeOutputSchema(TStringBuf) cannot be used to process variants over tuple");
+
+ auto stype = vtype->GetUnderlyingType()->template Cast<TStructExprType>();
+
+ auto index = stype->FindItem(tableName);
+
+ Y_ENSURE(
+ index.Defined(),
+ "Invalid table index " << TString{tableName}.Quote());
+
+ return NCommon::TypeToYsonNode(stype->GetItems()[*index]->GetItemType());
+}
+
+template <typename TBase>
+NYT::TNode TWorkerFactory<TBase>::MakeFullOutputSchema() const {
+ Y_ENSURE(OutputType_, "MakeFullOutputSchema() cannot be used with precompiled programs");
+ return NCommon::TypeToYsonNode(OutputType_);
+}
+
+template <typename TBase>
+const THashSet<TString>& TWorkerFactory<TBase>::GetUsedColumns(ui32 inputIndex) const {
+ Y_ENSURE(
+ inputIndex < UsedColumns_.size(),
+ "invalid input index (" << inputIndex << ") in GetUsedColumns call");
+
+ return UsedColumns_[inputIndex];
+}
+
+template <typename TBase>
+const THashSet<TString>& TWorkerFactory<TBase>::GetUsedColumns() const {
+ Y_ENSURE(
+ UsedColumns_.size() == 1,
+ "GetUsedColumns() can be used only with single-input programs");
+
+ return UsedColumns_[0];
+}
+
+template <typename TBase>
+TIssues TWorkerFactory<TBase>::GetIssues() const {
+ return ExprContext_.IssueManager.GetCompletedIssues();
+}
+
+template <typename TBase>
+TString TWorkerFactory<TBase>::GetCompiledProgram() {
+ if (ExprRoot_) {
+ NKikimr::NMiniKQL::TScopedAlloc alloc(__LOCATION__, NKikimr::TAlignedPagePoolCounters(),
+ FuncRegistry_->SupportsSizedAllocators());
+ NKikimr::NMiniKQL::TTypeEnvironment env(alloc);
+
+ auto rootNode = CompileMkql(ExprRoot_, ExprContext_, *FuncRegistry_, env, UserData_);
+ return NKikimr::NMiniKQL::SerializeRuntimeNode(rootNode, env);
+ }
+
+ return SerializedProgram_;
+}
+
+template <typename TBase>
+void TWorkerFactory<TBase>::ReturnWorker(IWorker* worker) {
+ THolder<IWorker> tmp(worker);
+ if (UseWorkerPool_) {
+ WorkerPool_.push_back(std::move(tmp));
+ }
+}
+
+
+#define DEFINE_WORKER_MAKER(MODE) \
+ TWorkerHolder<I##MODE##Worker> T##MODE##WorkerFactory::MakeWorker() { \
+ if (!WorkerPool_.empty()) { \
+ auto res = std::move(WorkerPool_.back()); \
+ WorkerPool_.pop_back(); \
+ return TWorkerHolder<I##MODE##Worker>((I##MODE##Worker *)res.Release()); \
+ } \
+ return TWorkerHolder<I##MODE##Worker>(new T##MODE##Worker( \
+ weak_from_this(), \
+ ExprRoot_, \
+ ExprContext_, \
+ SerializedProgram_, \
+ *FuncRegistry_, \
+ UserData_, \
+ InputTypes_, \
+ OriginalInputTypes_, \
+ OutputType_, \
+ LLVMSettings_, \
+ CountersProvider_, \
+ NativeYtTypeFlags_, \
+ DeterministicTimeProviderSeed_ \
+ )); \
+ }
+
+DEFINE_WORKER_MAKER(PullStream)
+DEFINE_WORKER_MAKER(PullList)
+DEFINE_WORKER_MAKER(PushStream)
+
+namespace NYql {
+ namespace NPureCalc {
+ template
+ class TWorkerFactory<IPullStreamWorkerFactory>;
+
+ template
+ class TWorkerFactory<IPullListWorkerFactory>;
+
+ template
+ class TWorkerFactory<IPushStreamWorkerFactory>;
+ }
+}
diff --git a/ydb/library/yql/public/purecalc/common/worker_factory.h b/ydb/library/yql/public/purecalc/common/worker_factory.h
new file mode 100644
index 0000000000..901e20fe88
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/common/worker_factory.h
@@ -0,0 +1,157 @@
+#pragma once
+
+#include <ydb/library/yql/public/purecalc/common/interface.h>
+
+#include "processor_mode.h"
+
+#include <util/generic/ptr.h>
+#include <ydb/library/yql/ast/yql_expr.h>
+#include <ydb/library/yql/core/yql_user_data.h>
+#include <ydb/library/yql/minikql/mkql_function_registry.h>
+#include <ydb/library/yql/core/yql_type_annotation.h>
+#include <utility>
+
+namespace NYql {
+ namespace NPureCalc {
+ struct TWorkerFactoryOptions {
+ IProgramFactoryPtr Factory;
+ const TInputSpecBase& InputSpec;
+ const TOutputSpecBase& OutputSpec;
+ TStringBuf Query;
+ TIntrusivePtr<NKikimr::NMiniKQL::IMutableFunctionRegistry> FuncRegistry;
+ IModuleResolver::TPtr ModuleResolver;
+ const TUserDataTable& UserData;
+ const THashMap<TString, TString>& Modules;
+ TString LLVMSettings;
+ NKikimr::NUdf::ICountersProvider* CountersProvider_;
+ ETranslationMode TranslationMode_;
+ ui16 SyntaxVersion_;
+ ui64 NativeYtTypeFlags_;
+ TMaybe<ui64> DeterministicTimeProviderSeed_;
+ bool UseSystemColumns;
+ bool UseWorkerPool;
+
+ TWorkerFactoryOptions(
+ IProgramFactoryPtr Factory,
+ const TInputSpecBase& InputSpec,
+ const TOutputSpecBase& OutputSpec,
+ TStringBuf Query,
+ TIntrusivePtr<NKikimr::NMiniKQL::IMutableFunctionRegistry> FuncRegistry,
+ IModuleResolver::TPtr ModuleResolver,
+ const TUserDataTable& UserData,
+ const THashMap<TString, TString>& Modules,
+ TString LLVMSettings,
+ NKikimr::NUdf::ICountersProvider* CountersProvider,
+ ETranslationMode translationMode,
+ ui16 syntaxVersion,
+ ui64 nativeYtTypeFlags,
+ TMaybe<ui64> deterministicTimeProviderSeed,
+ bool useSystemColumns,
+ bool useWorkerPool
+ )
+ : Factory(std::move(Factory))
+ , InputSpec(InputSpec)
+ , OutputSpec(OutputSpec)
+ , Query(Query)
+ , FuncRegistry(std::move(FuncRegistry))
+ , ModuleResolver(std::move(ModuleResolver))
+ , UserData(UserData)
+ , Modules(Modules)
+ , LLVMSettings(std::move(LLVMSettings))
+ , CountersProvider_(CountersProvider)
+ , TranslationMode_(translationMode)
+ , SyntaxVersion_(syntaxVersion)
+ , NativeYtTypeFlags_(nativeYtTypeFlags)
+ , DeterministicTimeProviderSeed_(deterministicTimeProviderSeed)
+ , UseSystemColumns(useSystemColumns)
+ , UseWorkerPool(useWorkerPool)
+ {
+ }
+ };
+
+ template <typename TBase>
+ class TWorkerFactory: public TBase {
+ private:
+ IProgramFactoryPtr Factory_;
+
+ protected:
+ TIntrusivePtr<NKikimr::NMiniKQL::IMutableFunctionRegistry> FuncRegistry_;
+ const TUserDataTable& UserData_;
+ TExprContext ExprContext_;
+ TExprNode::TPtr ExprRoot_;
+ TString SerializedProgram_;
+ TVector<const TStructExprType*> InputTypes_;
+ TVector<const TStructExprType*> OriginalInputTypes_;
+ const TTypeAnnotationNode* OutputType_;
+ TVector<THashSet<TString>> AllColumns_;
+ TVector<THashSet<TString>> UsedColumns_;
+ TString LLVMSettings_;
+ NKikimr::NUdf::ICountersProvider* CountersProvider_;
+ ui64 NativeYtTypeFlags_;
+ TMaybe<ui64> DeterministicTimeProviderSeed_;
+ bool UseSystemColumns_;
+ bool UseWorkerPool_;
+ TVector<THolder<IWorker>> WorkerPool_;
+
+ public:
+ TWorkerFactory(TWorkerFactoryOptions, EProcessorMode);
+
+ public:
+ NYT::TNode MakeInputSchema(ui32) const override;
+ NYT::TNode MakeInputSchema() const override;
+ NYT::TNode MakeOutputSchema() const override;
+ NYT::TNode MakeOutputSchema(ui32) const override;
+ NYT::TNode MakeOutputSchema(TStringBuf) const override;
+ NYT::TNode MakeFullOutputSchema() const override;
+ const THashSet<TString>& GetUsedColumns(ui32 inputIndex) const override;
+ const THashSet<TString>& GetUsedColumns() const override;
+ TIssues GetIssues() const override;
+ TString GetCompiledProgram() override;
+
+ protected:
+ void ReturnWorker(IWorker* worker) override;
+
+ private:
+ TExprNode::TPtr Compile(TStringBuf query,
+ bool sql,
+ IModuleResolver::TPtr moduleResolver,
+ ui16 syntaxVersion,
+ const THashMap<TString, TString>& modules,
+ const TOutputSpecBase& outputSpec,
+ EProcessorMode processorMode);
+ };
+
+ class TPullStreamWorkerFactory final: public TWorkerFactory<IPullStreamWorkerFactory> {
+ public:
+ explicit TPullStreamWorkerFactory(TWorkerFactoryOptions options)
+ : TWorkerFactory(std::move(options), EProcessorMode::PullStream)
+ {
+ }
+
+ public:
+ TWorkerHolder<IPullStreamWorker> MakeWorker() override;
+ };
+
+ class TPullListWorkerFactory final: public TWorkerFactory<IPullListWorkerFactory> {
+ public:
+ explicit TPullListWorkerFactory(TWorkerFactoryOptions options)
+ : TWorkerFactory(std::move(options), EProcessorMode::PullList)
+ {
+ }
+
+ public:
+ TWorkerHolder<IPullListWorker> MakeWorker() override;
+ };
+
+ class TPushStreamWorkerFactory final: public TWorkerFactory<IPushStreamWorkerFactory> {
+ public:
+ explicit TPushStreamWorkerFactory(TWorkerFactoryOptions options)
+ : TWorkerFactory(std::move(options), EProcessorMode::PushStream)
+ {
+ }
+
+ public:
+ TWorkerHolder<IPushStreamWorker> MakeWorker() override;
+ };
+ }
+}
diff --git a/ydb/library/yql/public/purecalc/common/wrappers.cpp b/ydb/library/yql/public/purecalc/common/wrappers.cpp
new file mode 100644
index 0000000000..c808d7b394
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/common/wrappers.cpp
@@ -0,0 +1 @@
+#include "wrappers.h"
diff --git a/ydb/library/yql/public/purecalc/common/wrappers.h b/ydb/library/yql/public/purecalc/common/wrappers.h
new file mode 100644
index 0000000000..4d65e01271
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/common/wrappers.h
@@ -0,0 +1,70 @@
+#pragma once
+
+#include "fwd.h"
+
+#include <util/generic/ptr.h>
+
+namespace NYql::NPureCalc::NPrivate {
+ template <typename TNew, typename TOld, typename TFunctor>
+ class TMappingStream final: public IStream<TNew> {
+ private:
+ THolder<IStream<TOld>> Old_;
+ TFunctor Functor_;
+
+ public:
+ TMappingStream(THolder<IStream<TOld>> old, TFunctor functor)
+ : Old_(std::move(old))
+ , Functor_(std::move(functor))
+ {
+ }
+
+ public:
+ TNew Fetch() override {
+ return Functor_(Old_->Fetch());
+ }
+ };
+
+ template <typename TNew, typename TOld, typename TFunctor>
+ class TMappingConsumer final: public IConsumer<TNew> {
+ private:
+ THolder<IConsumer<TOld>> Old_;
+ TFunctor Functor_;
+
+ public:
+ TMappingConsumer(THolder<IConsumer<TOld>> old, TFunctor functor)
+ : Old_(std::move(old))
+ , Functor_(std::move(functor))
+ {
+ }
+
+ public:
+ void OnObject(TNew object) override {
+ Old_->OnObject(Functor_(object));
+ }
+
+ void OnFinish() override {
+ Old_->OnFinish();
+ }
+ };
+
+ template <typename T, typename C>
+ class TNonOwningConsumer final: public IConsumer<T> {
+ private:
+ C Consumer;
+
+ public:
+ explicit TNonOwningConsumer(const C& consumer)
+ : Consumer(consumer)
+ {
+ }
+
+ public:
+ void OnObject(T t) override {
+ Consumer->OnObject(t);
+ }
+
+ void OnFinish() override {
+ Consumer->OnFinish();
+ }
+ };
+}
diff --git a/ydb/library/yql/public/purecalc/common/ya.make b/ydb/library/yql/public/purecalc/common/ya.make
new file mode 100644
index 0000000000..0994915641
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/common/ya.make
@@ -0,0 +1,47 @@
+LIBRARY()
+
+SRCS(
+ compile_mkql.cpp
+ fwd.cpp
+ inspect_input.cpp
+ interface.cpp
+ logger_init.cpp
+ names.cpp
+ processor_mode.cpp
+ program_factory.cpp
+ transformations/align_output_schema.cpp
+ transformations/extract_used_columns.cpp
+ transformations/output_columns_filter.cpp
+ transformations/replace_table_reads.cpp
+ transformations/type_annotation.cpp
+ type_from_schema.cpp
+ worker.cpp
+ worker_factory.cpp
+ wrappers.cpp
+)
+
+PEERDIR(
+ ydb/library/yql/sql/pg
+ ydb/library/yql/ast
+ ydb/library/yql/core/services
+ ydb/library/yql/core/services/mounts
+ ydb/library/yql/core/user_data
+ ydb/library/yql/minikql/comp_nodes/llvm
+ ydb/library/yql/utils/backtrace
+ ydb/library/yql/utils/log
+ ydb/library/yql/core
+ ydb/library/yql/core/type_ann
+ ydb/library/yql/parser/pg_wrapper
+ ydb/library/yql/providers/common/codec
+ ydb/library/yql/providers/common/comp_nodes
+ ydb/library/yql/providers/common/mkql
+ ydb/library/yql/providers/common/provider
+ ydb/library/yql/providers/common/schema/expr
+ ydb/library/yql/providers/common/udf_resolve
+)
+
+YQL_LAST_ABI_VERSION()
+
+GENERATE_ENUM_SERIALIZATION(interface.h)
+
+END()
diff --git a/ydb/library/yql/public/purecalc/examples/CMakeLists.txt b/ydb/library/yql/public/purecalc/examples/CMakeLists.txt
new file mode 100644
index 0000000000..ad9eebe96e
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/examples/CMakeLists.txt
@@ -0,0 +1,11 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(protobuf)
+add_subdirectory(protobuf_pull_list)
+add_subdirectory(skiff_pull_list)
diff --git a/ydb/library/yql/public/purecalc/examples/protobuf/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/public/purecalc/examples/protobuf/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..711c146299
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/examples/protobuf/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,64 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_executable(protobuf)
+target_compile_options(protobuf PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_link_libraries(protobuf PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-cpuid_check
+ yql-public-purecalc
+ purecalc-io_specs-protobuf
+ purecalc-helpers-stream
+ contrib-libs-protobuf
+)
+target_link_options(protobuf PRIVATE
+ -Wl,-platform_version,macos,11.0,11.0
+ -fPIC
+ -fPIC
+ -framework
+ CoreFoundation
+)
+target_proto_messages(protobuf PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/examples/protobuf/main.proto
+)
+target_sources(protobuf PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/examples/protobuf/main.cpp
+)
+target_allocator(protobuf
+ system_allocator
+)
+target_proto_addincls(protobuf
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(protobuf
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
+vcs_info(protobuf)
diff --git a/ydb/library/yql/public/purecalc/examples/protobuf/CMakeLists.linux-aarch64.txt b/ydb/library/yql/public/purecalc/examples/protobuf/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..6bc0ca6ea0
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/examples/protobuf/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,67 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_executable(protobuf)
+target_compile_options(protobuf PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_link_libraries(protobuf PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ yql-public-purecalc
+ purecalc-io_specs-protobuf
+ purecalc-helpers-stream
+ contrib-libs-protobuf
+)
+target_link_options(protobuf PRIVATE
+ -ldl
+ -lrt
+ -Wl,--no-as-needed
+ -fPIC
+ -fPIC
+ -lpthread
+ -lrt
+ -ldl
+)
+target_proto_messages(protobuf PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/examples/protobuf/main.proto
+)
+target_sources(protobuf PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/examples/protobuf/main.cpp
+)
+target_allocator(protobuf
+ cpp-malloc-jemalloc
+)
+target_proto_addincls(protobuf
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(protobuf
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
+vcs_info(protobuf)
diff --git a/ydb/library/yql/public/purecalc/examples/protobuf/CMakeLists.linux-x86_64.txt b/ydb/library/yql/public/purecalc/examples/protobuf/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..9a176f6229
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/examples/protobuf/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,69 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_executable(protobuf)
+target_compile_options(protobuf PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_link_libraries(protobuf PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-cpuid_check
+ yql-public-purecalc
+ purecalc-io_specs-protobuf
+ purecalc-helpers-stream
+ contrib-libs-protobuf
+)
+target_link_options(protobuf PRIVATE
+ -ldl
+ -lrt
+ -Wl,--no-as-needed
+ -fPIC
+ -fPIC
+ -lpthread
+ -lrt
+ -ldl
+)
+target_proto_messages(protobuf PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/examples/protobuf/main.proto
+)
+target_sources(protobuf PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/examples/protobuf/main.cpp
+)
+target_allocator(protobuf
+ cpp-malloc-tcmalloc
+ libs-tcmalloc-no_percpu_cache
+)
+target_proto_addincls(protobuf
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(protobuf
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
+vcs_info(protobuf)
diff --git a/ydb/library/yql/public/purecalc/examples/protobuf/CMakeLists.txt b/ydb/library/yql/public/purecalc/examples/protobuf/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/examples/protobuf/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/ydb/library/yql/public/purecalc/examples/protobuf/CMakeLists.windows-x86_64.txt b/ydb/library/yql/public/purecalc/examples/protobuf/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..d6221c45d2
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/examples/protobuf/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,57 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_executable(protobuf)
+target_compile_options(protobuf PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_link_libraries(protobuf PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-cpuid_check
+ yql-public-purecalc
+ purecalc-io_specs-protobuf
+ purecalc-helpers-stream
+ contrib-libs-protobuf
+)
+target_proto_messages(protobuf PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/examples/protobuf/main.proto
+)
+target_sources(protobuf PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/examples/protobuf/main.cpp
+)
+target_allocator(protobuf
+ system_allocator
+)
+target_proto_addincls(protobuf
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(protobuf
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
+vcs_info(protobuf)
diff --git a/ydb/library/yql/public/purecalc/examples/protobuf/main.cpp b/ydb/library/yql/public/purecalc/examples/protobuf/main.cpp
new file mode 100644
index 0000000000..8ce3692766
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/examples/protobuf/main.cpp
@@ -0,0 +1,133 @@
+#include <ydb/library/yql/public/purecalc/examples/protobuf/main.pb.h>
+
+#include <ydb/library/yql/public/purecalc/purecalc.h>
+#include <ydb/library/yql/public/purecalc/io_specs/protobuf/spec.h>
+#include <ydb/library/yql/public/purecalc/helpers/stream/stream_from_vector.h>
+
+using namespace NYql::NPureCalc;
+using namespace NExampleProtos;
+
+void PullStreamExample(IProgramFactoryPtr);
+void PushStreamExample(IProgramFactoryPtr);
+void PrecompileExample(IProgramFactoryPtr factory);
+THolder<IStream<TInput*>> MakeInput();
+
+class TConsumer: public IConsumer<TOutput*> {
+public:
+ void OnObject(TOutput* message) override {
+ Cout << "path = " << message->GetPath() << Endl;
+ Cout << "host = " << message->GetHost() << Endl;
+ }
+
+ void OnFinish() override {
+ Cout << "end" << Endl;
+ }
+};
+
+const char* Query = R"(
+ $a = (SELECT * FROM Input);
+ $b = (SELECT CAST(Url::GetTail(Url) AS Utf8) AS Path, CAST(Url::GetHost(Url) AS Utf8) AS Host, Ip FROM $a);
+ $c = (SELECT Path, Host FROM $b WHERE Path IS NOT NULL AND Host IS NOT NULL AND Ip::IsIPv4(Ip::FromString(Ip)));
+ $d = (SELECT Unwrap(Path) AS Path, Unwrap(Host) AS Host FROM $c);
+ SELECT * FROM $d;
+)";
+
+int main(int argc, char** argv) {
+ try {
+ auto factory = MakeProgramFactory(
+ TProgramFactoryOptions().SetUDFsDir(argc > 1 ? argv[1] : "../../../../udfs"));
+
+ Cout << "Pull stream:" << Endl;
+ PullStreamExample(factory);
+
+ Cout << Endl;
+ Cout << "Push stream:" << Endl;
+ PushStreamExample(factory);
+
+ Cout << Endl;
+ Cout << "Pull stream with pre-compilation:" << Endl;
+ PrecompileExample(factory);
+ } catch (const TCompileError& err) {
+ Cerr << err.GetIssues() << Endl;
+ Cerr << err.what() << Endl;
+ }
+}
+
+void PullStreamExample(IProgramFactoryPtr factory) {
+ auto program = factory->MakePullStreamProgram(
+ TProtobufInputSpec<TInput>(),
+ TProtobufOutputSpec<TOutput>(),
+ Query,
+ ETranslationMode::SQL);
+
+ auto result = program->Apply(MakeInput());
+
+ while (auto* message = result->Fetch()) {
+ Cout << "path = " << message->GetPath() << Endl;
+ Cout << "host = " << message->GetHost() << Endl;
+ }
+}
+
+void PushStreamExample(IProgramFactoryPtr factory) {
+ auto program = factory->MakePushStreamProgram(
+ TProtobufInputSpec<TInput>(),
+ TProtobufOutputSpec<TOutput>(),
+ Query,
+ ETranslationMode::SQL);
+
+ auto consumer = program->Apply(MakeHolder<TConsumer>());
+
+ auto input = MakeInput();
+ while (auto* message = input->Fetch()) {
+ consumer->OnObject(message);
+ }
+ consumer->OnFinish();
+}
+
+void PrecompileExample(IProgramFactoryPtr factory) {
+ TString prg;
+ {
+ auto program = factory->MakePullStreamProgram(
+ TProtobufInputSpec<TInput>(),
+ TProtobufOutputSpec<TOutput>(),
+ Query,
+ ETranslationMode::SQL);
+
+ prg = program->GetCompiledProgram();
+ }
+
+ auto program = factory->MakePullStreamProgram(
+ TProtobufInputSpec<TInput>(),
+ TProtobufOutputSpec<TOutput>(),
+ prg,
+ ETranslationMode::Mkql);
+
+ auto result = program->Apply(MakeInput());
+
+ while (auto* message = result->Fetch()) {
+ Cout << "path = " << message->GetPath() << Endl;
+ Cout << "host = " << message->GetHost() << Endl;
+ }
+}
+
+THolder<IStream<TInput*>> MakeInput() {
+ TVector<TInput> input;
+
+ {
+ auto& message = input.emplace_back();
+ message.SetUrl("https://news.yandex.ru/Moscow/index.html?from=index");
+ message.SetIp("83.220.231.160");
+ }
+ {
+ auto& message = input.emplace_back();
+ message.SetUrl("https://music.yandex.ru/radio/");
+ message.SetIp("83.220.231.161");
+ }
+ {
+ auto& message = input.emplace_back();
+ message.SetUrl("https://yandex.ru/maps/?ll=141.475401%2C11.581666&spn=1.757813%2C1.733096&z=7&l=map%2Cstv%2Csta&mode=search&panorama%5Bpoint%5D=141.476317%2C11.582710&panorama%5Bdirection%5D=177.241445%2C-15.219821&panorama%5Bspan%5D=107.410156%2C61.993317");
+ message.SetIp("::ffff:77.75.155.3");
+ }
+
+ return StreamFromVector(std::move(input));
+}
diff --git a/ydb/library/yql/public/purecalc/examples/protobuf/main.proto b/ydb/library/yql/public/purecalc/examples/protobuf/main.proto
new file mode 100644
index 0000000000..54fd15e226
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/examples/protobuf/main.proto
@@ -0,0 +1,11 @@
+package NExampleProtos;
+
+message TInput {
+ required string Url = 1;
+ required string Ip = 2;
+}
+
+message TOutput {
+ required string Path = 1;
+ required string Host = 2;
+}
diff --git a/ydb/library/yql/public/purecalc/examples/protobuf/ut/canondata/exectest.run_protobuf_/log.out b/ydb/library/yql/public/purecalc/examples/protobuf/ut/canondata/exectest.run_protobuf_/log.out
new file mode 100644
index 0000000000..1ec34e485d
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/examples/protobuf/ut/canondata/exectest.run_protobuf_/log.out
@@ -0,0 +1,18 @@
+Pull stream:
+path = /Moscow/index.html?from=index
+host = news.yandex.ru
+path = /radio/
+host = music.yandex.ru
+
+Push stream:
+path = /Moscow/index.html?from=index
+host = news.yandex.ru
+path = /radio/
+host = music.yandex.ru
+end
+
+Pull stream with pre-compilation:
+path = /Moscow/index.html?from=index
+host = news.yandex.ru
+path = /radio/
+host = music.yandex.ru
diff --git a/ydb/library/yql/public/purecalc/examples/protobuf/ut/canondata/result.json b/ydb/library/yql/public/purecalc/examples/protobuf/ut/canondata/result.json
new file mode 100644
index 0000000000..96a5814765
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/examples/protobuf/ut/canondata/result.json
@@ -0,0 +1,5 @@
+{
+ "exectest.run[protobuf]": {
+ "uri": "file://exectest.run_protobuf_/log.out"
+ }
+}
diff --git a/ydb/library/yql/public/purecalc/examples/protobuf/ut/ya.make b/ydb/library/yql/public/purecalc/examples/protobuf/ut/ya.make
new file mode 100644
index 0000000000..04c2feeba0
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/examples/protobuf/ut/ya.make
@@ -0,0 +1,11 @@
+EXECTEST()
+
+RUN(protobuf ${ARCADIA_BUILD_ROOT}/yql/udfs STDOUT log.out CANONIZE_LOCALLY log.out)
+
+DEPENDS(
+ ydb/library/yql/public/purecalc/examples/protobuf
+ yql/udfs/common/url
+ yql/udfs/common/ip
+)
+
+END()
diff --git a/ydb/library/yql/public/purecalc/examples/protobuf/ya.make b/ydb/library/yql/public/purecalc/examples/protobuf/ya.make
new file mode 100644
index 0000000000..a03f259bff
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/examples/protobuf/ya.make
@@ -0,0 +1,27 @@
+PROGRAM()
+
+SRCS(
+ main.proto
+ main.cpp
+)
+
+PEERDIR(
+ ydb/library/yql/public/purecalc
+ ydb/library/yql/public/purecalc/io_specs/protobuf
+ ydb/library/yql/public/purecalc/helpers/stream
+)
+
+
+ YQL_LAST_ABI_VERSION()
+
+
+END()
+
+RECURSE_ROOT_RELATIVE(
+ yql/udfs/common/url
+ yql/udfs/common/ip
+)
+
+RECURSE_FOR_TESTS(
+ ut
+)
diff --git a/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..d33d5a41fa
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,64 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_executable(protobuf_pull_list)
+target_compile_options(protobuf_pull_list PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_link_libraries(protobuf_pull_list PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-cpuid_check
+ yql-public-purecalc
+ purecalc-io_specs-protobuf
+ purecalc-helpers-stream
+ contrib-libs-protobuf
+)
+target_link_options(protobuf_pull_list PRIVATE
+ -Wl,-platform_version,macos,11.0,11.0
+ -fPIC
+ -fPIC
+ -framework
+ CoreFoundation
+)
+target_proto_messages(protobuf_pull_list PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/main.proto
+)
+target_sources(protobuf_pull_list PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/main.cpp
+)
+target_allocator(protobuf_pull_list
+ system_allocator
+)
+target_proto_addincls(protobuf_pull_list
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(protobuf_pull_list
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
+vcs_info(protobuf_pull_list)
diff --git a/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/CMakeLists.linux-aarch64.txt b/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..efb081bc44
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,67 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_executable(protobuf_pull_list)
+target_compile_options(protobuf_pull_list PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_link_libraries(protobuf_pull_list PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ yql-public-purecalc
+ purecalc-io_specs-protobuf
+ purecalc-helpers-stream
+ contrib-libs-protobuf
+)
+target_link_options(protobuf_pull_list PRIVATE
+ -ldl
+ -lrt
+ -Wl,--no-as-needed
+ -fPIC
+ -fPIC
+ -lpthread
+ -lrt
+ -ldl
+)
+target_proto_messages(protobuf_pull_list PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/main.proto
+)
+target_sources(protobuf_pull_list PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/main.cpp
+)
+target_allocator(protobuf_pull_list
+ cpp-malloc-jemalloc
+)
+target_proto_addincls(protobuf_pull_list
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(protobuf_pull_list
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
+vcs_info(protobuf_pull_list)
diff --git a/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/CMakeLists.linux-x86_64.txt b/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..f27cca0d51
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,69 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_executable(protobuf_pull_list)
+target_compile_options(protobuf_pull_list PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_link_libraries(protobuf_pull_list PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-cpuid_check
+ yql-public-purecalc
+ purecalc-io_specs-protobuf
+ purecalc-helpers-stream
+ contrib-libs-protobuf
+)
+target_link_options(protobuf_pull_list PRIVATE
+ -ldl
+ -lrt
+ -Wl,--no-as-needed
+ -fPIC
+ -fPIC
+ -lpthread
+ -lrt
+ -ldl
+)
+target_proto_messages(protobuf_pull_list PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/main.proto
+)
+target_sources(protobuf_pull_list PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/main.cpp
+)
+target_allocator(protobuf_pull_list
+ cpp-malloc-tcmalloc
+ libs-tcmalloc-no_percpu_cache
+)
+target_proto_addincls(protobuf_pull_list
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(protobuf_pull_list
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
+vcs_info(protobuf_pull_list)
diff --git a/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/CMakeLists.txt b/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/CMakeLists.windows-x86_64.txt b/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..e74c8255b6
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,57 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_executable(protobuf_pull_list)
+target_compile_options(protobuf_pull_list PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_link_libraries(protobuf_pull_list PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-cpuid_check
+ yql-public-purecalc
+ purecalc-io_specs-protobuf
+ purecalc-helpers-stream
+ contrib-libs-protobuf
+)
+target_proto_messages(protobuf_pull_list PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/main.proto
+)
+target_sources(protobuf_pull_list PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/main.cpp
+)
+target_allocator(protobuf_pull_list
+ system_allocator
+)
+target_proto_addincls(protobuf_pull_list
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(protobuf_pull_list
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
+vcs_info(protobuf_pull_list)
diff --git a/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/main.cpp b/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/main.cpp
new file mode 100644
index 0000000000..f10c2aa9be
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/main.cpp
@@ -0,0 +1,75 @@
+#include <ydb/library/yql/public/purecalc/examples/protobuf_pull_list/main.pb.h>
+
+#include <ydb/library/yql/public/purecalc/purecalc.h>
+#include <ydb/library/yql/public/purecalc/io_specs/protobuf/spec.h>
+#include <ydb/library/yql/public/purecalc/helpers/stream/stream_from_vector.h>
+
+using namespace NYql::NPureCalc;
+using namespace NExampleProtos;
+
+const char* Query = R"(
+ SELECT
+ Url,
+ COUNT(*) AS Hits
+ FROM
+ Input
+ GROUP BY
+ Url
+ ORDER BY
+ Url
+)";
+
+THolder<IStream<TInput*>> MakeInput();
+
+int main() {
+ try {
+ auto factory = MakeProgramFactory();
+
+ auto program = factory->MakePullListProgram(
+ TProtobufInputSpec<TInput>(),
+ TProtobufOutputSpec<TOutput>(),
+ Query,
+ ETranslationMode::SQL
+ );
+
+ auto result = program->Apply(MakeInput());
+
+ while (auto* message = result->Fetch()) {
+ Cout << "url = " << message->GetUrl() << Endl;
+ Cout << "hits = " << message->GetHits() << Endl;
+ }
+ } catch (TCompileError& e) {
+ Cout << e.GetIssues();
+ }
+}
+
+THolder<IStream<TInput*>> MakeInput() {
+ TVector<TInput> input;
+
+ {
+ auto& message = input.emplace_back();
+ message.SetUrl("https://yandex.ru/a");
+ }
+ {
+ auto& message = input.emplace_back();
+ message.SetUrl("https://yandex.ru/a");
+ }
+ {
+ auto& message = input.emplace_back();
+ message.SetUrl("https://yandex.ru/b");
+ }
+ {
+ auto& message = input.emplace_back();
+ message.SetUrl("https://yandex.ru/c");
+ }
+ {
+ auto& message = input.emplace_back();
+ message.SetUrl("https://yandex.ru/b");
+ }
+ {
+ auto& message = input.emplace_back();
+ message.SetUrl("https://yandex.ru/b");
+ }
+
+ return StreamFromVector(std::move(input));
+}
diff --git a/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/main.proto b/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/main.proto
new file mode 100644
index 0000000000..2766c4b8c0
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/main.proto
@@ -0,0 +1,10 @@
+package NExampleProtos;
+
+message TInput {
+ required string Url = 1;
+}
+
+message TOutput {
+ required string Url = 1;
+ required uint64 Hits = 2;
+}
diff --git a/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/ut/canondata/exectest.run_protobuf_pull_list_/log.out b/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/ut/canondata/exectest.run_protobuf_pull_list_/log.out
new file mode 100644
index 0000000000..0a799ed4b0
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/ut/canondata/exectest.run_protobuf_pull_list_/log.out
@@ -0,0 +1,6 @@
+url = https://yandex.ru/a
+hits = 2
+url = https://yandex.ru/b
+hits = 3
+url = https://yandex.ru/c
+hits = 1
diff --git a/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/ut/canondata/result.json b/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/ut/canondata/result.json
new file mode 100644
index 0000000000..668467cc85
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/ut/canondata/result.json
@@ -0,0 +1,6 @@
+{
+ "exectest.run[protobuf_pull_list]": {
+ "checksum": "29bf513fe0ca6f81ae076213a1c7801c",
+ "uri": "file://exectest.run_protobuf_pull_list_/log.out"
+ }
+} \ No newline at end of file
diff --git a/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/ut/ya.make b/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/ut/ya.make
new file mode 100644
index 0000000000..011ee76699
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/ut/ya.make
@@ -0,0 +1,9 @@
+EXECTEST()
+
+RUN(protobuf_pull_list STDOUT log.out CANONIZE_LOCALLY log.out)
+
+DEPENDS(
+ ydb/library/yql/public/purecalc/examples/protobuf_pull_list
+)
+
+END()
diff --git a/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/ya.make b/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/ya.make
new file mode 100644
index 0000000000..953ff1bf92
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/ya.make
@@ -0,0 +1,22 @@
+PROGRAM()
+
+SRCS(
+ main.proto
+ main.cpp
+)
+
+PEERDIR(
+ ydb/library/yql/public/purecalc
+ ydb/library/yql/public/purecalc/io_specs/protobuf
+ ydb/library/yql/public/purecalc/helpers/stream
+)
+
+
+ YQL_LAST_ABI_VERSION()
+
+
+END()
+
+RECURSE_FOR_TESTS(
+ ut
+)
diff --git a/ydb/library/yql/public/purecalc/examples/skiff_pull_list/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/public/purecalc/examples/skiff_pull_list/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..d2cf21e855
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/examples/skiff_pull_list/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,34 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_executable(skiff_pull_list)
+target_compile_options(skiff_pull_list PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_link_libraries(skiff_pull_list PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-cpuid_check
+ yql-public-purecalc
+ purecalc-io_specs-mkql
+)
+target_link_options(skiff_pull_list PRIVATE
+ -Wl,-platform_version,macos,11.0,11.0
+ -fPIC
+ -fPIC
+ -framework
+ CoreFoundation
+)
+target_sources(skiff_pull_list PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/examples/skiff_pull_list/main.cpp
+)
+target_allocator(skiff_pull_list
+ system_allocator
+)
+vcs_info(skiff_pull_list)
diff --git a/ydb/library/yql/public/purecalc/examples/skiff_pull_list/CMakeLists.linux-aarch64.txt b/ydb/library/yql/public/purecalc/examples/skiff_pull_list/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..596b226847
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/examples/skiff_pull_list/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,37 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_executable(skiff_pull_list)
+target_compile_options(skiff_pull_list PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_link_libraries(skiff_pull_list PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ yql-public-purecalc
+ purecalc-io_specs-mkql
+)
+target_link_options(skiff_pull_list PRIVATE
+ -ldl
+ -lrt
+ -Wl,--no-as-needed
+ -fPIC
+ -fPIC
+ -lpthread
+ -lrt
+ -ldl
+)
+target_sources(skiff_pull_list PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/examples/skiff_pull_list/main.cpp
+)
+target_allocator(skiff_pull_list
+ cpp-malloc-jemalloc
+)
+vcs_info(skiff_pull_list)
diff --git a/ydb/library/yql/public/purecalc/examples/skiff_pull_list/CMakeLists.linux-x86_64.txt b/ydb/library/yql/public/purecalc/examples/skiff_pull_list/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..ebcdf00807
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/examples/skiff_pull_list/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,39 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_executable(skiff_pull_list)
+target_compile_options(skiff_pull_list PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_link_libraries(skiff_pull_list PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-cpuid_check
+ yql-public-purecalc
+ purecalc-io_specs-mkql
+)
+target_link_options(skiff_pull_list PRIVATE
+ -ldl
+ -lrt
+ -Wl,--no-as-needed
+ -fPIC
+ -fPIC
+ -lpthread
+ -lrt
+ -ldl
+)
+target_sources(skiff_pull_list PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/examples/skiff_pull_list/main.cpp
+)
+target_allocator(skiff_pull_list
+ cpp-malloc-tcmalloc
+ libs-tcmalloc-no_percpu_cache
+)
+vcs_info(skiff_pull_list)
diff --git a/ydb/library/yql/public/purecalc/examples/skiff_pull_list/CMakeLists.txt b/ydb/library/yql/public/purecalc/examples/skiff_pull_list/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/examples/skiff_pull_list/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/ydb/library/yql/public/purecalc/examples/skiff_pull_list/CMakeLists.windows-x86_64.txt b/ydb/library/yql/public/purecalc/examples/skiff_pull_list/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..84d2ebfdcc
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/examples/skiff_pull_list/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,27 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_executable(skiff_pull_list)
+target_compile_options(skiff_pull_list PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_link_libraries(skiff_pull_list PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-cpuid_check
+ yql-public-purecalc
+ purecalc-io_specs-mkql
+)
+target_sources(skiff_pull_list PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/examples/skiff_pull_list/main.cpp
+)
+target_allocator(skiff_pull_list
+ system_allocator
+)
+vcs_info(skiff_pull_list)
diff --git a/ydb/library/yql/public/purecalc/examples/skiff_pull_list/main.cpp b/ydb/library/yql/public/purecalc/examples/skiff_pull_list/main.cpp
new file mode 100644
index 0000000000..57aa4e0f26
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/examples/skiff_pull_list/main.cpp
@@ -0,0 +1,92 @@
+
+#include <ydb/library/yql/public/purecalc/purecalc.h>
+#include <ydb/library/yql/public/purecalc/io_specs/mkql/spec.h>
+
+#include <ydb/library/yql/core/user_data/yql_user_data.h>
+
+#include <util/stream/file.h>
+#include <util/datetime/base.h>
+#include <library/cpp/yson/node/node.h>
+#include <library/cpp/yson/node/node_io.h>
+
+#include <library/cpp/skiff/skiff.h>
+
+using namespace NYql::NUserData;
+using namespace NYT;
+using namespace NYql::NPureCalc;
+
+const char* Query = R"(
+ SELECT
+ Url,
+ COUNT(*) AS Hits
+ FROM
+ Input
+ GROUP BY
+ Url
+ ORDER BY
+ Hits desc
+)";
+
+int main() {
+ auto addField = [&](NYT::TNode& members, const TString& name, const TString& type, const bool isOptional) {
+ auto typeNode = NYT::TNode::CreateList()
+ .Add("DataType")
+ .Add(type);
+
+ if (isOptional) {
+ typeNode = NYT::TNode::CreateList()
+ .Add("OptionalType")
+ .Add(typeNode);
+ }
+
+ members.Add(NYT::TNode::CreateList()
+ .Add(name)
+ .Add(typeNode));
+ };
+
+ NYT::TNode members{NYT::TNode::CreateList()};
+ addField(members, "Url", "String", false);
+ NYT::TNode schema = NYT::TNode::CreateList()
+ .Add("StructType")
+ .Add(members);
+
+ Cout << "InputSchema: " << NodeToYsonString(schema) << Endl;
+ auto inputSpec = TSkiffInputSpec(TVector<NYT::TNode>{schema});
+ auto outputSpec = TSkiffOutputSpec({NYT::TNode::CreateEntity()});
+ auto factoryOptions = TProgramFactoryOptions();
+ factoryOptions.SetNativeYtTypeFlags(0);
+ factoryOptions.SetLLVMSettings("OFF");
+ auto factory = MakeProgramFactory(factoryOptions);
+ auto program = factory->MakePullListProgram(
+ inputSpec,
+ outputSpec,
+ Query,
+ ETranslationMode::SQL);
+ Cout << "OutpSchema: " << NYT::NodeToCanonicalYsonString(program->MakeFullOutputSchema()) << Endl;
+ TStringStream stream;
+ NSkiff::TUncheckedSkiffWriter writer{&stream};
+ writer.WriteVariant16Tag(0);
+ writer.WriteString32("https://yandex.ru/a");
+ writer.WriteVariant16Tag(0);
+ writer.WriteString32("https://yandex.ru/a");
+ writer.WriteVariant16Tag(0);
+ writer.WriteString32("https://yandex.ru/b");
+ writer.WriteVariant16Tag(0);
+ writer.WriteString32("https://yandex.ru/c");
+ writer.WriteVariant16Tag(0);
+ writer.WriteString32("https://yandex.ru/b");
+ writer.WriteVariant16Tag(0);
+ writer.WriteString32("https://yandex.ru/b");
+ writer.Finish();
+ auto input = TStringStream(stream);
+ auto handle = program->Apply(&input);
+ TStringStream output;
+ handle->Run(&output);
+ auto parser = NSkiff::TUncheckedSkiffParser(&output);
+ while (parser.HasMoreData()) {
+ parser.ParseVariant16Tag();
+ auto hits = parser.ParseInt64();
+ auto url = parser.ParseString32();
+ Cout << "URL: " << url << " Hits: " << hits << Endl;
+ }
+}
diff --git a/ydb/library/yql/public/purecalc/examples/skiff_pull_list/ya.make b/ydb/library/yql/public/purecalc/examples/skiff_pull_list/ya.make
new file mode 100644
index 0000000000..0966d670fe
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/examples/skiff_pull_list/ya.make
@@ -0,0 +1,14 @@
+PROGRAM()
+
+SRCS(
+ main.cpp
+)
+
+PEERDIR(
+ ydb/library/yql/public/purecalc
+ ydb/library/yql/public/purecalc/io_specs/mkql
+)
+
+YQL_LAST_ABI_VERSION()
+
+END()
diff --git a/ydb/library/yql/public/purecalc/examples/ya.make b/ydb/library/yql/public/purecalc/examples/ya.make
new file mode 100644
index 0000000000..ad5853e9c4
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/examples/ya.make
@@ -0,0 +1,7 @@
+RECURSE(
+ protobuf
+ protobuf/ut
+ protobuf_pull_list
+ protobuf_pull_list/ut
+ skiff_pull_list
+)
diff --git a/ydb/library/yql/public/purecalc/helpers/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/public/purecalc/helpers/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..f83ed3b540
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/helpers/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,18 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(protobuf)
+add_subdirectory(stream)
+
+add_library(public-purecalc-helpers INTERFACE)
+target_link_libraries(public-purecalc-helpers INTERFACE
+ contrib-libs-cxxsupp
+ yutil
+ purecalc-helpers-protobuf
+ purecalc-helpers-stream
+)
diff --git a/ydb/library/yql/public/purecalc/helpers/CMakeLists.linux-aarch64.txt b/ydb/library/yql/public/purecalc/helpers/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..a28bda905c
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/helpers/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,19 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(protobuf)
+add_subdirectory(stream)
+
+add_library(public-purecalc-helpers INTERFACE)
+target_link_libraries(public-purecalc-helpers INTERFACE
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ purecalc-helpers-protobuf
+ purecalc-helpers-stream
+)
diff --git a/ydb/library/yql/public/purecalc/helpers/CMakeLists.linux-x86_64.txt b/ydb/library/yql/public/purecalc/helpers/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..a28bda905c
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/helpers/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,19 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(protobuf)
+add_subdirectory(stream)
+
+add_library(public-purecalc-helpers INTERFACE)
+target_link_libraries(public-purecalc-helpers INTERFACE
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ purecalc-helpers-protobuf
+ purecalc-helpers-stream
+)
diff --git a/ydb/library/yql/public/purecalc/helpers/CMakeLists.txt b/ydb/library/yql/public/purecalc/helpers/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/helpers/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/ydb/library/yql/public/purecalc/helpers/CMakeLists.windows-x86_64.txt b/ydb/library/yql/public/purecalc/helpers/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..f83ed3b540
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/helpers/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,18 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(protobuf)
+add_subdirectory(stream)
+
+add_library(public-purecalc-helpers INTERFACE)
+target_link_libraries(public-purecalc-helpers INTERFACE
+ contrib-libs-cxxsupp
+ yutil
+ purecalc-helpers-protobuf
+ purecalc-helpers-stream
+)
diff --git a/ydb/library/yql/public/purecalc/helpers/protobuf/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/public/purecalc/helpers/protobuf/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..0f473a2304
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/helpers/protobuf/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,20 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(purecalc-helpers-protobuf)
+target_link_libraries(purecalc-helpers-protobuf PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-protobuf
+ cpp-yson-node
+ yt_proto-yt-formats
+)
+target_sources(purecalc-helpers-protobuf PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/helpers/protobuf/schema_from_proto.cpp
+)
diff --git a/ydb/library/yql/public/purecalc/helpers/protobuf/CMakeLists.linux-aarch64.txt b/ydb/library/yql/public/purecalc/helpers/protobuf/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..54e7e527ae
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/helpers/protobuf/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,21 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(purecalc-helpers-protobuf)
+target_link_libraries(purecalc-helpers-protobuf PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-protobuf
+ cpp-yson-node
+ yt_proto-yt-formats
+)
+target_sources(purecalc-helpers-protobuf PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/helpers/protobuf/schema_from_proto.cpp
+)
diff --git a/ydb/library/yql/public/purecalc/helpers/protobuf/CMakeLists.linux-x86_64.txt b/ydb/library/yql/public/purecalc/helpers/protobuf/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..54e7e527ae
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/helpers/protobuf/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,21 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(purecalc-helpers-protobuf)
+target_link_libraries(purecalc-helpers-protobuf PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-protobuf
+ cpp-yson-node
+ yt_proto-yt-formats
+)
+target_sources(purecalc-helpers-protobuf PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/helpers/protobuf/schema_from_proto.cpp
+)
diff --git a/ydb/library/yql/public/purecalc/helpers/protobuf/CMakeLists.txt b/ydb/library/yql/public/purecalc/helpers/protobuf/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/helpers/protobuf/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/ydb/library/yql/public/purecalc/helpers/protobuf/CMakeLists.windows-x86_64.txt b/ydb/library/yql/public/purecalc/helpers/protobuf/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..0f473a2304
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/helpers/protobuf/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,20 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(purecalc-helpers-protobuf)
+target_link_libraries(purecalc-helpers-protobuf PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-protobuf
+ cpp-yson-node
+ yt_proto-yt-formats
+)
+target_sources(purecalc-helpers-protobuf PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/helpers/protobuf/schema_from_proto.cpp
+)
diff --git a/ydb/library/yql/public/purecalc/helpers/protobuf/schema_from_proto.cpp b/ydb/library/yql/public/purecalc/helpers/protobuf/schema_from_proto.cpp
new file mode 100644
index 0000000000..6927c46240
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/helpers/protobuf/schema_from_proto.cpp
@@ -0,0 +1,202 @@
+#include "schema_from_proto.h"
+
+#include <yt/yt_proto/yt/formats/extension.pb.h>
+
+#include <util/generic/algorithm.h>
+#include <util/generic/string.h>
+#include <util/string/printf.h>
+#include <util/string/vector.h>
+
+namespace pb = google::protobuf;
+
+namespace NYql {
+ namespace NPureCalc {
+
+ TProtoSchemaOptions::TProtoSchemaOptions()
+ : EnumPolicy(EEnumPolicy::Int32)
+ , ListIsOptional(false)
+ {
+ }
+
+ TProtoSchemaOptions& TProtoSchemaOptions::SetEnumPolicy(EEnumPolicy policy) {
+ EnumPolicy = policy;
+ return *this;
+ }
+
+ TProtoSchemaOptions& TProtoSchemaOptions::SetListIsOptional(bool value) {
+ ListIsOptional = value;
+ return *this;
+ }
+
+ TProtoSchemaOptions& TProtoSchemaOptions::SetFieldRenames(
+ THashMap<TString, TString> fieldRenames
+ ) {
+ FieldRenames = std::move(fieldRenames);
+ return *this;
+ }
+
+ namespace {
+ EEnumFormatType EnumFormatTypeWithYTFlag(const pb::FieldDescriptor& enumField, EEnumFormatType defaultEnumFormatType) {
+ auto flags = enumField.options().GetRepeatedExtension(NYT::flags);
+ for (auto flag : flags) {
+ if (flag == NYT::EWrapperFieldFlag::ENUM_INT) {
+ return EEnumFormatType::Int32;
+ } else if (flag == NYT::EWrapperFieldFlag::ENUM_STRING) {
+ return EEnumFormatType::String;
+ }
+ }
+ return defaultEnumFormatType;
+ }
+ }
+
+ EEnumFormatType EnumFormatType(const pb::FieldDescriptor& enumField, EEnumPolicy enumPolicy) {
+ switch (enumPolicy) {
+ case EEnumPolicy::Int32:
+ return EEnumFormatType::Int32;
+ case EEnumPolicy::String:
+ return EEnumFormatType::String;
+ case EEnumPolicy::YTFlagDefaultInt32:
+ return EnumFormatTypeWithYTFlag(enumField, EEnumFormatType::Int32);
+ case EEnumPolicy::YTFlagDefaultString:
+ return EnumFormatTypeWithYTFlag(enumField, EEnumFormatType::String);
+ }
+ }
+
+ namespace {
+ const char* FormatTypeName(const pb::FieldDescriptor* field, EEnumPolicy enumPolicy) {
+ switch (field->type()) {
+ case pb::FieldDescriptor::TYPE_DOUBLE:
+ return "Double";
+ case pb::FieldDescriptor::TYPE_FLOAT:
+ return "Float";
+ case pb::FieldDescriptor::TYPE_INT64:
+ case pb::FieldDescriptor::TYPE_SFIXED64:
+ case pb::FieldDescriptor::TYPE_SINT64:
+ return "Int64";
+ case pb::FieldDescriptor::TYPE_UINT64:
+ case pb::FieldDescriptor::TYPE_FIXED64:
+ return "Uint64";
+ case pb::FieldDescriptor::TYPE_INT32:
+ case pb::FieldDescriptor::TYPE_SFIXED32:
+ case pb::FieldDescriptor::TYPE_SINT32:
+ return "Int32";
+ case pb::FieldDescriptor::TYPE_UINT32:
+ case pb::FieldDescriptor::TYPE_FIXED32:
+ return "Uint32";
+ case pb::FieldDescriptor::TYPE_BOOL:
+ return "Bool";
+ case pb::FieldDescriptor::TYPE_STRING:
+ return "Utf8";
+ case pb::FieldDescriptor::TYPE_BYTES:
+ return "String";
+ case pb::FieldDescriptor::TYPE_ENUM:
+ switch (EnumFormatType(*field, enumPolicy)) {
+ case EEnumFormatType::Int32:
+ return "Int32";
+ case EEnumFormatType::String:
+ return "String";
+ }
+ default:
+ ythrow yexception() << "Unsupported protobuf type: " << field->type_name()
+ << ", field: " << field->name() << ", " << int(field->type());
+ }
+ }
+ }
+
+ NYT::TNode MakeSchemaFromProto(const pb::Descriptor& descriptor, TVector<const pb::Descriptor*>& nested, const TProtoSchemaOptions& options) {
+ if (Find(nested, &descriptor) != nested.end()) {
+ TVector<TString> nestedNames;
+ for (const auto* d : nested) {
+ nestedNames.push_back(d->full_name());
+ }
+ nestedNames.push_back(descriptor.full_name());
+ ythrow yexception() << Sprintf("recursive messages are not supported (%s)",
+ JoinStrings(nestedNames, "->").c_str());
+ }
+ nested.push_back(&descriptor);
+
+ auto items = NYT::TNode::CreateList();
+ for (int fieldNo = 0; fieldNo < descriptor.field_count(); ++fieldNo) {
+ const auto& fieldDescriptor = *descriptor.field(fieldNo);
+
+ auto name = fieldDescriptor.name();
+ if (
+ auto renamePtr = options.FieldRenames.FindPtr(name);
+ nested.size() == 1 && renamePtr
+ ) {
+ name = *renamePtr;
+ }
+
+ NYT::TNode itemType;
+ if (fieldDescriptor.type() == pb::FieldDescriptor::TYPE_MESSAGE) {
+ itemType = MakeSchemaFromProto(*fieldDescriptor.message_type(), nested, options);
+ } else {
+ itemType = NYT::TNode::CreateList();
+ itemType.Add("DataType");
+ itemType.Add(FormatTypeName(&fieldDescriptor, options.EnumPolicy));
+ }
+ switch (fieldDescriptor.label()) {
+ case pb::FieldDescriptor::LABEL_OPTIONAL:
+ {
+ auto optionalType = NYT::TNode::CreateList();
+ optionalType.Add("OptionalType");
+ optionalType.Add(std::move(itemType));
+ itemType = std::move(optionalType);
+ }
+ break;
+ case pb::FieldDescriptor::LABEL_REQUIRED:
+ break;
+ case pb::FieldDescriptor::LABEL_REPEATED:
+ {
+ auto listType = NYT::TNode::CreateList();
+ listType.Add("ListType");
+ listType.Add(std::move(itemType));
+ itemType = std::move(listType);
+ if (options.ListIsOptional) {
+ itemType = NYT::TNode::CreateList().Add("OptionalType").Add(std::move(itemType));
+ }
+ }
+ break;
+ default:
+ ythrow yexception() << "Unknown protobuf label: " << (ui32)fieldDescriptor.label() << ", field: " << name;
+ }
+
+ auto itemNode = NYT::TNode::CreateList();
+ itemNode.Add(name);
+ itemNode.Add(std::move(itemType));
+
+ items.Add(std::move(itemNode));
+ }
+ auto root = NYT::TNode::CreateList();
+ root.Add("StructType");
+ root.Add(std::move(items));
+
+ nested.pop_back();
+ return root;
+ }
+
+ NYT::TNode MakeSchemaFromProto(const pb::Descriptor& descriptor, const TProtoSchemaOptions& options) {
+ TVector<const pb::Descriptor*> nested;
+ return MakeSchemaFromProto(descriptor, nested, options);
+ }
+
+ NYT::TNode MakeVariantSchemaFromProtos(const TVector<const pb::Descriptor*>& descriptors, const TProtoSchemaOptions& options) {
+ Y_ENSURE(options.FieldRenames.empty(), "Renames are not supported in variant mode");
+
+ auto tupleItems = NYT::TNode::CreateList();
+ for (auto descriptor : descriptors) {
+ tupleItems.Add(MakeSchemaFromProto(*descriptor, options));
+ }
+
+ auto tupleType = NYT::TNode::CreateList();
+ tupleType.Add("TupleType");
+ tupleType.Add(std::move(tupleItems));
+
+ auto variantType = NYT::TNode::CreateList();
+ variantType.Add("VariantType");
+ variantType.Add(std::move(tupleType));
+
+ return variantType;
+ }
+ }
+}
diff --git a/ydb/library/yql/public/purecalc/helpers/protobuf/schema_from_proto.h b/ydb/library/yql/public/purecalc/helpers/protobuf/schema_from_proto.h
new file mode 100644
index 0000000000..168c654ac7
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/helpers/protobuf/schema_from_proto.h
@@ -0,0 +1,60 @@
+#pragma once
+
+#include <library/cpp/yson/node/node.h>
+
+#include <util/generic/hash.h>
+#include <util/generic/string.h>
+
+#include <google/protobuf/descriptor.h>
+
+
+namespace NYql {
+ namespace NPureCalc {
+ enum class EEnumPolicy {
+ Int32,
+ String,
+ YTFlagDefaultInt32,
+ YTFlagDefaultString
+ };
+
+ enum class EEnumFormatType {
+ Int32,
+ String
+ };
+
+ /**
+ * Options that customize building of struct type from protobuf descriptor.
+ */
+ struct TProtoSchemaOptions {
+ public:
+ EEnumPolicy EnumPolicy;
+ bool ListIsOptional;
+ THashMap<TString, TString> FieldRenames;
+
+ public:
+ TProtoSchemaOptions();
+
+ public:
+ TProtoSchemaOptions& SetEnumPolicy(EEnumPolicy);
+
+ TProtoSchemaOptions& SetListIsOptional(bool);
+
+ TProtoSchemaOptions& SetFieldRenames(
+ THashMap<TString, TString> fieldRenames
+ );
+ };
+
+ EEnumFormatType EnumFormatType(const google::protobuf::FieldDescriptor& enumField, EEnumPolicy enumPolicy);
+
+ /**
+ * Build struct type from a protobuf descriptor. The returned yson can be loaded into a struct annotation node
+ * using the ParseTypeFromYson function.
+ */
+ NYT::TNode MakeSchemaFromProto(const google::protobuf::Descriptor&, const TProtoSchemaOptions& = {});
+
+ /**
+ * Build variant over tuple type from protobuf descriptors.
+ */
+ NYT::TNode MakeVariantSchemaFromProtos(const TVector<const google::protobuf::Descriptor*>&, const TProtoSchemaOptions& = {});
+ }
+}
diff --git a/ydb/library/yql/public/purecalc/helpers/protobuf/ya.make b/ydb/library/yql/public/purecalc/helpers/protobuf/ya.make
new file mode 100644
index 0000000000..11300baba8
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/helpers/protobuf/ya.make
@@ -0,0 +1,14 @@
+LIBRARY()
+
+SRCS(
+ schema_from_proto.cpp
+)
+
+PEERDIR(
+ contrib/libs/protobuf
+ library/cpp/yson/node
+ yt/yt_proto/yt/formats
+ yt/yt_proto/yt/formats
+)
+
+END()
diff --git a/ydb/library/yql/public/purecalc/helpers/stream/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/public/purecalc/helpers/stream/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..778b96d4ba
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/helpers/stream/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,21 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(purecalc-helpers-stream)
+target_compile_options(purecalc-helpers-stream PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_link_libraries(purecalc-helpers-stream PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ public-purecalc-common
+)
+target_sources(purecalc-helpers-stream PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/helpers/stream/stream_from_vector.cpp
+)
diff --git a/ydb/library/yql/public/purecalc/helpers/stream/CMakeLists.linux-aarch64.txt b/ydb/library/yql/public/purecalc/helpers/stream/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..8f1f9643b6
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/helpers/stream/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,22 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(purecalc-helpers-stream)
+target_compile_options(purecalc-helpers-stream PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_link_libraries(purecalc-helpers-stream PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ public-purecalc-common
+)
+target_sources(purecalc-helpers-stream PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/helpers/stream/stream_from_vector.cpp
+)
diff --git a/ydb/library/yql/public/purecalc/helpers/stream/CMakeLists.linux-x86_64.txt b/ydb/library/yql/public/purecalc/helpers/stream/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..8f1f9643b6
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/helpers/stream/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,22 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(purecalc-helpers-stream)
+target_compile_options(purecalc-helpers-stream PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_link_libraries(purecalc-helpers-stream PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ public-purecalc-common
+)
+target_sources(purecalc-helpers-stream PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/helpers/stream/stream_from_vector.cpp
+)
diff --git a/ydb/library/yql/public/purecalc/helpers/stream/CMakeLists.txt b/ydb/library/yql/public/purecalc/helpers/stream/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/helpers/stream/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/ydb/library/yql/public/purecalc/helpers/stream/CMakeLists.windows-x86_64.txt b/ydb/library/yql/public/purecalc/helpers/stream/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..778b96d4ba
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/helpers/stream/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,21 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(purecalc-helpers-stream)
+target_compile_options(purecalc-helpers-stream PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_link_libraries(purecalc-helpers-stream PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ public-purecalc-common
+)
+target_sources(purecalc-helpers-stream PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/helpers/stream/stream_from_vector.cpp
+)
diff --git a/ydb/library/yql/public/purecalc/helpers/stream/stream_from_vector.cpp b/ydb/library/yql/public/purecalc/helpers/stream/stream_from_vector.cpp
new file mode 100644
index 0000000000..e1aed5d689
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/helpers/stream/stream_from_vector.cpp
@@ -0,0 +1 @@
+#include "stream_from_vector.h"
diff --git a/ydb/library/yql/public/purecalc/helpers/stream/stream_from_vector.h b/ydb/library/yql/public/purecalc/helpers/stream/stream_from_vector.h
new file mode 100644
index 0000000000..51d8513332
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/helpers/stream/stream_from_vector.h
@@ -0,0 +1,40 @@
+#pragma once
+
+#include <ydb/library/yql/public/purecalc/common/interface.h>
+
+namespace NYql {
+ namespace NPureCalc {
+ namespace NPrivate {
+ template <typename T>
+ class TVectorStream final: public IStream<T*> {
+ private:
+ size_t I_;
+ TVector<T> Data_;
+
+ public:
+ explicit TVectorStream(TVector<T> data)
+ : I_(0)
+ , Data_(std::move(data))
+ {
+ }
+
+ public:
+ T* Fetch() override {
+ if (I_ >= Data_.size()) {
+ return nullptr;
+ } else {
+ return &Data_[I_++];
+ }
+ }
+ };
+ }
+
+ /**
+ * Convert vector into a purecalc stream.
+ */
+ template <typename T>
+ THolder<IStream<T*>> StreamFromVector(TVector<T> data) {
+ return MakeHolder<NPrivate::TVectorStream<T>>(std::move(data));
+ }
+ }
+}
diff --git a/ydb/library/yql/public/purecalc/helpers/stream/ya.make b/ydb/library/yql/public/purecalc/helpers/stream/ya.make
new file mode 100644
index 0000000000..c96f93b582
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/helpers/stream/ya.make
@@ -0,0 +1,13 @@
+LIBRARY()
+
+SRCS(
+ stream_from_vector.cpp
+)
+
+PEERDIR(
+ ydb/library/yql/public/purecalc/common
+)
+
+YQL_LAST_ABI_VERSION()
+
+END()
diff --git a/ydb/library/yql/public/purecalc/helpers/ya.make b/ydb/library/yql/public/purecalc/helpers/ya.make
new file mode 100644
index 0000000000..b228b159d9
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/helpers/ya.make
@@ -0,0 +1,8 @@
+LIBRARY()
+
+PEERDIR(
+ ydb/library/yql/public/purecalc/helpers/protobuf
+ ydb/library/yql/public/purecalc/helpers/stream
+)
+
+END()
diff --git a/ydb/library/yql/public/purecalc/io_specs/CMakeLists.txt b/ydb/library/yql/public/purecalc/io_specs/CMakeLists.txt
new file mode 100644
index 0000000000..6ce928c0a6
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/io_specs/CMakeLists.txt
@@ -0,0 +1,11 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(mkql)
+add_subdirectory(protobuf)
+add_subdirectory(protobuf_raw)
diff --git a/ydb/library/yql/public/purecalc/io_specs/mkql/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/public/purecalc/io_specs/mkql/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..a94716ee3e
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/io_specs/mkql/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,27 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(ut)
+
+add_library(purecalc-io_specs-mkql)
+target_compile_options(purecalc-io_specs-mkql PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_link_libraries(purecalc-io_specs-mkql PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ public-purecalc-common
+ providers-yt-codec
+ providers-yt-common
+ yt-lib-mkql_helpers
+ providers-common-codec
+ common-schema-mkql
+)
+target_sources(purecalc-io_specs-mkql PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/mkql/spec.cpp
+)
diff --git a/ydb/library/yql/public/purecalc/io_specs/mkql/CMakeLists.linux-aarch64.txt b/ydb/library/yql/public/purecalc/io_specs/mkql/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..2c6e3a6bee
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/io_specs/mkql/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,28 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(ut)
+
+add_library(purecalc-io_specs-mkql)
+target_compile_options(purecalc-io_specs-mkql PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_link_libraries(purecalc-io_specs-mkql PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ public-purecalc-common
+ providers-yt-codec
+ providers-yt-common
+ yt-lib-mkql_helpers
+ providers-common-codec
+ common-schema-mkql
+)
+target_sources(purecalc-io_specs-mkql PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/mkql/spec.cpp
+)
diff --git a/ydb/library/yql/public/purecalc/io_specs/mkql/CMakeLists.linux-x86_64.txt b/ydb/library/yql/public/purecalc/io_specs/mkql/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..2c6e3a6bee
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/io_specs/mkql/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,28 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(ut)
+
+add_library(purecalc-io_specs-mkql)
+target_compile_options(purecalc-io_specs-mkql PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_link_libraries(purecalc-io_specs-mkql PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ public-purecalc-common
+ providers-yt-codec
+ providers-yt-common
+ yt-lib-mkql_helpers
+ providers-common-codec
+ common-schema-mkql
+)
+target_sources(purecalc-io_specs-mkql PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/mkql/spec.cpp
+)
diff --git a/ydb/library/yql/public/purecalc/io_specs/mkql/CMakeLists.txt b/ydb/library/yql/public/purecalc/io_specs/mkql/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/io_specs/mkql/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/ydb/library/yql/public/purecalc/io_specs/mkql/CMakeLists.windows-x86_64.txt b/ydb/library/yql/public/purecalc/io_specs/mkql/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..a94716ee3e
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/io_specs/mkql/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,27 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(ut)
+
+add_library(purecalc-io_specs-mkql)
+target_compile_options(purecalc-io_specs-mkql PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_link_libraries(purecalc-io_specs-mkql PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ public-purecalc-common
+ providers-yt-codec
+ providers-yt-common
+ yt-lib-mkql_helpers
+ providers-common-codec
+ common-schema-mkql
+)
+target_sources(purecalc-io_specs-mkql PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/mkql/spec.cpp
+)
diff --git a/ydb/library/yql/public/purecalc/io_specs/mkql/spec.cpp b/ydb/library/yql/public/purecalc/io_specs/mkql/spec.cpp
new file mode 100644
index 0000000000..043b2ab156
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/io_specs/mkql/spec.cpp
@@ -0,0 +1,934 @@
+#include "spec.h"
+
+#include <ydb/library/yql/public/purecalc/common/names.h>
+#include <ydb/library/yql/minikql/computation/mkql_custom_list.h>
+#include <ydb/library/yql/providers/yt/codec/yt_codec_io.h>
+#include <ydb/library/yql/providers/yt/lib/mkql_helpers/mkql_helpers.h>
+#include <ydb/library/yql/providers/yt/common/yql_names.h>
+#include <ydb/library/yql/providers/common/codec/yql_codec_type_flags.h>
+#include <ydb/library/yql/providers/common/schema/mkql/yql_mkql_schema.h>
+#include <ydb/library/yql/minikql/mkql_node_cast.h>
+
+#include <library/cpp/yson/node/node_io.h>
+
+#include <util/generic/noncopyable.h>
+#include <util/generic/ptr.h>
+
+
+namespace {
+ const TStringBuf PathColumnShortName = "path";
+
+ template <typename T>
+ inline TVector<THolder<T>> VectorFromHolder(THolder<T> holder) {
+ TVector<THolder<T>> result;
+ result.push_back(std::move(holder));
+ return result;
+ }
+
+ template <typename TRowType>
+ NYT::TNode ComposeRowSpec(const TRowType* rowType, ui64 nativeYtTypeFlags, bool strictSchema) {
+ constexpr bool isNodeType = std::is_same_v<TRowType, NYT::TNode>;
+
+ static_assert(isNodeType || std::is_same_v<TRowType, NKikimr::NMiniKQL::TType>);
+
+ auto typeNode = NYT::TNode::CreateMap();
+ if constexpr (isNodeType) {
+ typeNode[NYql::RowSpecAttrType] = *rowType;
+ } else {
+ typeNode[NYql::RowSpecAttrType] = NYql::NCommon::TypeToYsonNode(rowType);
+ }
+ typeNode[NYql::RowSpecAttrNativeYtTypeFlags] = nativeYtTypeFlags;
+ typeNode[NYql::RowSpecAttrStrictSchema] = strictSchema;
+
+ auto attrNode = NYT::TNode::CreateMap();
+ attrNode[NYql::YqlRowSpecAttribute] = std::move(typeNode);
+
+ return attrNode;
+ }
+
+ struct TInputDescription {
+ public:
+ ui32 InputIndex;
+ const TMaybe<TVector<TString>>& TableNames;
+ const NYT::TNode& InputSchema;
+ const bool UseOriginalRowSpec;
+
+ public:
+ template <bool UseSkiff>
+ TInputDescription(const NYql::NPureCalc::TMkqlInputSpec<UseSkiff>& spec, ui32 inputIndex)
+ : InputIndex(inputIndex)
+ , TableNames(spec.GetTableNames(InputIndex))
+ , InputSchema(spec.GetSchemas().at(inputIndex))
+ , UseOriginalRowSpec(spec.UseOriginalRowSpec())
+ {
+ }
+
+ bool UseSystemColumns() const {
+ return TableNames.Defined();
+ }
+
+ size_t GetTablesNumber() const {
+ if (TableNames.Defined()) {
+ return TableNames->size();
+ }
+
+ return 1;
+ }
+ };
+
+ NYT::TNode ComposeYqlAttributesFromSchema(
+ const NKikimr::NMiniKQL::TType* type,
+ ui64 nativeYtTypeFlags,
+ bool strictSchema,
+ const TInputDescription* inputDescription = nullptr)
+ {
+ auto attrs = NYT::TNode::CreateMap();
+ NYT::TNode& tables = attrs[NYql::YqlIOSpecTables];
+
+ switch (type->GetKind()) {
+ case NKikimr::NMiniKQL::TType::EKind::Variant:
+ {
+ YQL_ENSURE(!inputDescription);
+
+ const auto* vtype = AS_TYPE(NKikimr::NMiniKQL::TVariantType, type);
+
+ NYT::TNode& registryNode = attrs[NYql::YqlIOSpecRegistry];
+ THashMap<TString, TString> uniqSpecs;
+
+ for (ui32 i = 0; i < vtype->GetAlternativesCount(); i++) {
+ TString refName = TStringBuilder() << "$table" << uniqSpecs.size();
+
+ auto rowSpec = ComposeRowSpec(vtype->GetAlternativeType(i), nativeYtTypeFlags, strictSchema);
+
+ auto res = uniqSpecs.emplace(NYT::NodeToCanonicalYsonString(rowSpec), refName);
+ if (res.second) {
+ registryNode[refName] = rowSpec;
+ } else {
+ refName = res.first->second;
+ }
+ tables.Add(refName);
+ }
+ break;
+ }
+ case NKikimr::NMiniKQL::TType::EKind::Struct:
+ {
+ auto rowSpec = NYT::TNode();
+
+ if (inputDescription && inputDescription->UseOriginalRowSpec) {
+ rowSpec = ComposeRowSpec(&inputDescription->InputSchema, nativeYtTypeFlags, strictSchema);
+ } else {
+ rowSpec = ComposeRowSpec(type, nativeYtTypeFlags, strictSchema);
+ }
+
+ if (inputDescription && inputDescription->UseSystemColumns()) {
+ rowSpec[NYql::YqlSysColumnPrefix] = NYT::TNode().Add(PathColumnShortName);
+ }
+
+ if (inputDescription && inputDescription->GetTablesNumber() > 1) {
+ TStringBuf refName = "$table0";
+ attrs[NYql::YqlIOSpecRegistry][refName] = std::move(rowSpec);
+ for (ui32 i = 0; i < inputDescription->GetTablesNumber(); ++i) {
+ tables.Add(refName);
+ }
+ } else {
+ tables.Add(std::move(rowSpec));
+ }
+ break;
+ }
+ default:
+ Y_UNREACHABLE();
+ }
+
+ return attrs;
+ }
+
+ NYql::NCommon::TCodecContext MakeCodecCtx(NYql::NPureCalc::IWorker* worker) {
+ return NYql::NCommon::TCodecContext(
+ worker->GetTypeEnvironment(),
+ worker->GetFunctionRegistry(),
+ &worker->GetGraph().GetHolderFactory()
+ );
+ }
+
+ NYql::TMkqlIOSpecs GetIOSpecs(
+ NYql::NPureCalc::IWorker* worker,
+ NYql::NCommon::TCodecContext& codecCtx,
+ bool useSkiff,
+ const TInputDescription* inputDescription = nullptr,
+ bool strictSchema = true
+ ) {
+ NYql::TMkqlIOSpecs specs;
+ if (useSkiff) {
+ specs.SetUseSkiff(worker->GetLLVMSettings());
+ }
+
+ if (inputDescription) {
+ const auto* type = worker->GetInputType(inputDescription->InputIndex, true);
+ const auto* fullType = worker->GetInputType(inputDescription->InputIndex, false);
+
+ YQL_ENSURE(!type->FindMemberIndex(NYql::YqlSysColumnPath));
+
+ size_t extraColumnsCount = 0;
+ if (inputDescription->UseSystemColumns()) {
+ YQL_ENSURE(fullType->FindMemberIndex(NYql::YqlSysColumnPath));
+ ++extraColumnsCount;
+ }
+ if (!strictSchema) {
+ YQL_ENSURE(fullType->FindMemberIndex(NYql::YqlOthersColumnName));
+ ++extraColumnsCount;
+ }
+
+ if (extraColumnsCount != 0) {
+ YQL_ENSURE(fullType->GetMembersCount() == type->GetMembersCount() + extraColumnsCount);
+ } else {
+ YQL_ENSURE(type == fullType);
+ }
+
+ auto attrs = ComposeYqlAttributesFromSchema(type, worker->GetNativeYtTypeFlags(), strictSchema, inputDescription);
+ if (inputDescription->TableNames) {
+ specs.Init(codecCtx, attrs, inputDescription->TableNames.GetRef(), {});
+ } else {
+ specs.Init(codecCtx, attrs, {}, {});
+ }
+ } else {
+ auto attrs = ComposeYqlAttributesFromSchema(worker->GetOutputType(), worker->GetNativeYtTypeFlags(), strictSchema);
+ specs.Init(codecCtx, attrs);
+ }
+
+ return specs;
+ }
+
+ class TRawTableReaderImpl final: public NYT::TRawTableReader {
+ private:
+ // If we own Underlying_, than Owned_ == Underlying_, otherwise Owned_ is nullptr.
+ THolder<IInputStream> Owned_;
+ IInputStream* Underlying_;
+ NKikimr::NMiniKQL::TScopedAlloc& ScopedAlloc_;
+
+ private:
+ TRawTableReaderImpl(
+ IInputStream* underlying,
+ THolder<IInputStream> owned,
+ NKikimr::NMiniKQL::TScopedAlloc& scopedAlloc
+ )
+ : Owned_(std::move(owned))
+ , Underlying_(underlying)
+ , ScopedAlloc_(scopedAlloc)
+ {
+ }
+
+ public:
+ TRawTableReaderImpl(THolder<IInputStream> stream, NKikimr::NMiniKQL::TScopedAlloc& scopedAlloc)
+ : TRawTableReaderImpl(stream.Get(), nullptr, scopedAlloc)
+ {
+ Owned_ = std::move(stream);
+ }
+
+ TRawTableReaderImpl(IInputStream* stream, NKikimr::NMiniKQL::TScopedAlloc& scopedAlloc)
+ : TRawTableReaderImpl(stream, nullptr, scopedAlloc)
+ {
+ }
+
+ bool Retry(const TMaybe<ui32>&, const TMaybe<ui64>&) override {
+ return false;
+ }
+
+ void ResetRetries() override {
+ }
+
+ bool HasRangeIndices() const override {
+ return false;
+ }
+
+ protected:
+ size_t DoRead(void* buf, size_t len) override {
+ auto unguard = Unguard(ScopedAlloc_);
+ return Underlying_->Read(buf, len);
+ }
+ };
+
+
+ class TMkqlListValue: public NKikimr::NMiniKQL::TCustomListValue {
+ private:
+ mutable bool HasIterator_ = false;
+ NYql::NPureCalc::IWorker* Worker_;
+ // Keeps struct members reorders
+ NYql::NCommon::TCodecContext CodecCtx_;
+ NYql::TMkqlIOSpecs IOSpecs_;
+ // If we own Underlying_, than Owned_ == Underlying_, otherwise Owned_ is nullptr.
+ THolder<NYT::TRawTableReader> Owned_;
+ NYT::TRawTableReader* Underlying_;
+ NYql::TMkqlReaderImpl Reader_;
+
+ private:
+ TMkqlListValue(
+ NKikimr::NMiniKQL::TMemoryUsageInfo* memInfo,
+ bool useSkiff,
+ NYT::TRawTableReader* underlying,
+ THolder<NYT::TRawTableReader> owned,
+ NYql::NPureCalc::IWorker* worker,
+ const TInputDescription& inputDescription,
+ bool ignoreStreamTableIndex = false,
+ bool strictSchema = true
+ ) : TCustomListValue(memInfo)
+ , Worker_(worker)
+ , CodecCtx_(MakeCodecCtx(Worker_))
+ , IOSpecs_(GetIOSpecs(Worker_, CodecCtx_, useSkiff, &inputDescription, strictSchema))
+ , Owned_(std::move(owned))
+ , Underlying_(underlying)
+ , Reader_(*Underlying_, 0, 1ul << 20, 0, ignoreStreamTableIndex)
+ {
+ Reader_.SetSpecs(IOSpecs_, Worker_->GetGraph().GetHolderFactory());
+ Reader_.Next();
+ }
+
+ public:
+ TMkqlListValue(
+ NKikimr::NMiniKQL::TMemoryUsageInfo* memInfo,
+ bool useSkiff,
+ THolder<NYT::TRawTableReader> stream,
+ NYql::NPureCalc::IWorker* worker,
+ const TInputDescription& inputDescription,
+ bool ignoreStreamTableIndex = false,
+ bool strictSchema = true
+ )
+ : TMkqlListValue(
+ memInfo, useSkiff, stream.Get(), nullptr, worker, inputDescription, ignoreStreamTableIndex, strictSchema)
+ {
+ Owned_ = std::move(stream);
+ }
+
+ TMkqlListValue(
+ NKikimr::NMiniKQL::TMemoryUsageInfo* memInfo,
+ bool useSkiff,
+ NYT::TRawTableReader* stream,
+ NYql::NPureCalc::IWorker* worker,
+ const TInputDescription& inputDescription,
+ bool ignoreStreamTableIndex,
+ bool strictSchema = true
+ )
+ : TMkqlListValue(memInfo, useSkiff, stream, nullptr, worker, inputDescription, ignoreStreamTableIndex, strictSchema)
+ {
+ }
+
+ NKikimr::NUdf::TUnboxedValue GetListIterator() const override {
+ YQL_ENSURE(!HasIterator_, "Only one pass over input is supported");
+ HasIterator_ = true;
+ return NKikimr::NUdf::TUnboxedValuePod(const_cast<TMkqlListValue*>(this));
+ }
+
+ bool Next(NKikimr::NUdf::TUnboxedValue& result) override {
+ if (!Reader_.IsValid()) {
+ return false;
+ }
+
+ result = Reader_.GetRow();
+ Reader_.Next();
+
+ return true;
+ }
+
+ NKikimr::NUdf::EFetchStatus Fetch(
+ NKikimr::NUdf::TUnboxedValue& result
+ ) override {
+ if (Next(result)) {
+ return NKikimr::NUdf::EFetchStatus::Ok;
+ }
+
+ return NKikimr::NUdf::EFetchStatus::Finish;
+ }
+ };
+
+ class TMkqlWriter: public NYql::NPureCalc::THandle {
+ protected:
+ virtual const NYql::NPureCalc::IWorker* GetWorker() const = 0;
+ virtual void DoRun(const TVector<IOutputStream*>& stream) = 0;
+
+ public:
+ void Run(IOutputStream* stream) final {
+ Y_ENSURE(
+ GetWorker()->GetOutputType()->IsStruct(),
+ "NYql::NPureCalc::THandle::Run(IOutputStream*) cannot be used with multi-output programs; "
+ "use other overloads of Run() instead.");
+
+ DoRun({stream});
+ }
+
+ void Run(const TVector<IOutputStream*>& streams) final {
+ Y_ENSURE(
+ GetWorker()->GetOutputType()->IsVariant(),
+ "NYql::NPureCalc::THandle::Run(TVector<IOutputStream*>) cannot be used with single-output programs; "
+ "use NYql::NPureCalc::THandle::Run(IOutputStream*) instead.");
+
+ const auto* variantType = AS_TYPE(NKikimr::NMiniKQL::TVariantType, GetWorker()->GetOutputType());
+
+ Y_ENSURE(
+ variantType->GetUnderlyingType()->IsTuple(),
+ "NYql::NPureCalc::THandle::Run(TVector<IOutputStream*>) cannot be used to process variants over struct; "
+ "use NYql::NPureCalc::THandle::Run(TMap<TString, IOutputStream*>) instead.");
+
+ const auto* tupleType = AS_TYPE(NKikimr::NMiniKQL::TTupleType, variantType->GetUnderlyingType());
+
+ Y_ENSURE(
+ tupleType->GetElementsCount() == streams.size(),
+ "Number of variant alternatives should match number of streams.");
+
+ DoRun(streams);
+ }
+
+ void Run(const TMap<TString, IOutputStream*>& streams) final {
+ Y_ENSURE(
+ GetWorker()->GetOutputType()->IsVariant(),
+ "NYql::NPureCalc::THandle::Run(TMap<TString, IOutputStream*>) cannot be used with single-output programs; "
+ "use NYql::NPureCalc::THandle::Run(IOutputStream*) instead.");
+
+ const auto* variantType = AS_TYPE(NKikimr::NMiniKQL::TVariantType, GetWorker()->GetOutputType());
+
+ Y_ENSURE(
+ variantType->GetUnderlyingType()->IsStruct(),
+ "NYql::NPureCalc::THandle::Run(TMap<TString, IOutputStream*>) cannot be used to process variants over tuple; "
+ "use NYql::NPureCalc::THandle::Run(TVector<IOutputStream*>) instead.");
+
+ const auto* structType = AS_TYPE(NKikimr::NMiniKQL::TStructType, variantType->GetUnderlyingType());
+
+ Y_ENSURE(
+ structType->GetMembersCount() == streams.size(),
+ "Number of variant alternatives should match number of streams.");
+
+ TVector<IOutputStream*> sortedStreams;
+ sortedStreams.reserve(structType->GetMembersCount());
+
+ for (ui32 i = 0; i < structType->GetMembersCount(); i++) {
+ auto name = TString{structType->GetMemberName(i)};
+ Y_ENSURE(streams.contains(name), "Cannot find stream for alternative " << name.Quote());
+ sortedStreams.push_back(streams.at(name));
+ }
+
+ DoRun(sortedStreams);
+ }
+ };
+
+ class TPullListMkqlWriter: public TMkqlWriter {
+ private:
+ NYql::NPureCalc::TWorkerHolder<NYql::NPureCalc::IPullListWorker> Worker_;
+ NYql::NCommon::TCodecContext CodecCtx_;
+ NYql::TMkqlIOSpecs IOSpecs_;
+
+ public:
+ TPullListMkqlWriter(
+ NYql::NPureCalc::TWorkerHolder<NYql::NPureCalc::IPullListWorker> worker,
+ bool useSkiff
+ )
+ : Worker_(std::move(worker))
+ , CodecCtx_(MakeCodecCtx(Worker_.Get()))
+ , IOSpecs_(GetIOSpecs(Worker_.Get(), CodecCtx_, useSkiff))
+ {
+ }
+
+ protected:
+ const NYql::NPureCalc::IWorker* GetWorker() const override {
+ return Worker_.Get();
+ }
+
+ void DoRun(const TVector<IOutputStream*>& outputs) override {
+ NKikimr::NMiniKQL::TBindTerminator bind(Worker_->GetGraph().GetTerminator());
+
+ with_lock(Worker_->GetScopedAlloc()) {
+ NYql::TMkqlWriterImpl writer{outputs, 0, 1ul << 20};
+ writer.SetSpecs(IOSpecs_);
+
+ const auto outputIterator = Worker_->GetOutputIterator();
+
+ for (NKikimr::NUdf::TUnboxedValue value; outputIterator.Next(value); writer.AddRow(value))
+ continue;
+
+ writer.Finish();
+ }
+ }
+ };
+
+ class TPullStreamMkqlWriter: public TMkqlWriter {
+ private:
+ NYql::NPureCalc::TWorkerHolder<NYql::NPureCalc::IPullStreamWorker> Worker_;
+ NYql::NCommon::TCodecContext CodecCtx_;
+ NYql::TMkqlIOSpecs IOSpecs_;
+
+ public:
+ TPullStreamMkqlWriter(
+ NYql::NPureCalc::TWorkerHolder<NYql::NPureCalc::IPullStreamWorker> worker,
+ bool useSkiff
+ )
+ : Worker_(std::move(worker))
+ , CodecCtx_(MakeCodecCtx(Worker_.Get()))
+ , IOSpecs_(GetIOSpecs(Worker_.Get(), CodecCtx_, useSkiff))
+ {
+ }
+
+ protected:
+ const NYql::NPureCalc::IWorker* GetWorker() const override {
+ return Worker_.Get();
+ }
+
+ void DoRun(const TVector<IOutputStream*>& outputs) override {
+ NKikimr::NMiniKQL::TBindTerminator bind(Worker_->GetGraph().GetTerminator());
+
+ with_lock(Worker_->GetScopedAlloc()) {
+ NYql::TMkqlWriterImpl writer{outputs, 0, 1ul << 20};
+ writer.SetSpecs(IOSpecs_);
+
+ const auto output = Worker_->GetOutput();
+
+ for (NKikimr::NUdf::TUnboxedValue value;;) {
+ const auto status = output.Fetch(value);
+
+ if (status == NKikimr::NUdf::EFetchStatus::Ok) {
+ writer.AddRow(value);
+ } else if (status == NKikimr::NUdf::EFetchStatus::Finish) {
+ break;
+ } else {
+ YQL_ENSURE(false, "Yield is not supported in pull mode");
+ }
+ }
+
+ writer.Finish();
+ }
+ }
+ };
+}
+
+namespace NYql {
+ namespace NPureCalc {
+ template <bool UseSkiff>
+ TMkqlInputSpec<UseSkiff>::TMkqlInputSpec(TVector<NYT::TNode> schemas)
+ : Schemas_(std::move(schemas))
+ {
+ AllTableNames_ = TVector<TMaybe<TVector<TString>>>(Schemas_.size(), Nothing());
+ this->AllVirtualColumns_ = TVector<THashMap<TString, NYT::TNode>>(Schemas_.size());
+ }
+
+ template <bool UseSkiff>
+ TMkqlInputSpec<UseSkiff>::TMkqlInputSpec(NYT::TNode schema, bool ignoreStreamTableIndex)
+ {
+ Schemas_.push_back(std::move(schema));
+ IgnoreStreamTableIndex_ = ignoreStreamTableIndex;
+ AllTableNames_.push_back(Nothing());
+ this->AllVirtualColumns_.push_back({});
+ }
+
+ template <bool UseSkiff>
+ const TVector<NYT::TNode>& TMkqlInputSpec<UseSkiff>::GetSchemas() const {
+ return Schemas_;
+ }
+
+ template <bool UseSkiff>
+ bool TMkqlInputSpec<UseSkiff>::IgnoreStreamTableIndex() const {
+ return IgnoreStreamTableIndex_;
+ }
+
+ template <bool UseSkiff>
+ bool TMkqlInputSpec<UseSkiff>::IsStrictSchema() const {
+ return StrictSchema_;
+ }
+
+ template <bool UseSkiff>
+ TMkqlInputSpec<UseSkiff>& TMkqlInputSpec<UseSkiff>::SetStrictSchema(bool strictSchema) {
+ static const NYT::TNode stringType = NYT::TNode::CreateList().Add("DataType").Add("String");
+ static const NYT::TNode othersColumntype = NYT::TNode::CreateList().Add("DictType").Add(stringType).Add(stringType);
+
+ StrictSchema_ = strictSchema;
+
+ for (size_t index = 0; index < Schemas_.size(); ++index) {
+ auto& schemaVirtualColumns = this->AllVirtualColumns_.at(index);
+ if (StrictSchema_) {
+ schemaVirtualColumns.erase(NYql::YqlOthersColumnName);
+ } else {
+ schemaVirtualColumns.emplace(NYql::YqlOthersColumnName, othersColumntype);
+ }
+ }
+
+ return *this;
+ }
+
+ template <bool UseSkiff>
+ bool TMkqlInputSpec<UseSkiff>::UseOriginalRowSpec() const {
+ return UseOriginalRowSpec_;
+ }
+
+ template <bool UseSkiff>
+ TMkqlInputSpec<UseSkiff>& TMkqlInputSpec<UseSkiff>::SetUseOriginalRowSpec(bool value) {
+ UseOriginalRowSpec_ = value;
+
+ return *this;
+ }
+
+ template <bool UseSkiff>
+ const TMaybe<TVector<TString>>& TMkqlInputSpec<UseSkiff>::GetTableNames() const {
+ Y_ENSURE(AllTableNames_.size() == 1, "expected single-input spec");
+
+ return AllTableNames_[0];
+ }
+
+ template <bool UseSkiff>
+ const TMaybe<TVector<TString>>& TMkqlInputSpec<UseSkiff>::GetTableNames(ui32 index) const {
+ Y_ENSURE(index < AllTableNames_.size(), "invalid input index");
+
+ return AllTableNames_[index];
+ }
+
+ template <bool UseSkiff>
+ TMkqlInputSpec<UseSkiff>& TMkqlInputSpec<UseSkiff>::SetTableNames(TVector<TString> tableNames) {
+ Y_ENSURE(AllTableNames_.size() == 1, "expected single-input spec");
+
+ return SetTableNames(std::move(tableNames), 0);
+ }
+
+ template <bool UseSkiff>
+ TMkqlInputSpec<UseSkiff>& TMkqlInputSpec<UseSkiff>::SetTableNames(TVector<TString> tableNames, ui32 index) {
+ Y_ENSURE(index < AllTableNames_.size(), "invalid input index");
+
+ auto& value = AllTableNames_[index];
+
+ if (!value.Defined()) {
+ YQL_ENSURE(NYql::YqlSysColumnPath == NYql::NPureCalc::PurecalcSysColumnTablePath);
+ YQL_ENSURE(NYql::GetSysColumnTypeId(PathColumnShortName) == NYql::NUdf::TDataType<char*>::Id);
+ this->AllVirtualColumns_.at(index).emplace(
+ NYql::YqlSysColumnPath, NYT::TNode::CreateList().Add("DataType").Add("String")
+ );
+ }
+
+ value = std::move(tableNames);
+
+ return *this;
+ }
+
+ template <bool UseSkiff>
+ TMkqlOutputSpec<UseSkiff>::TMkqlOutputSpec(NYT::TNode schema)
+ : Schema_(std::move(schema))
+ {
+ }
+
+ template <bool UseSkiff>
+ const NYT::TNode& TMkqlOutputSpec<UseSkiff>::GetSchema() const {
+ return Schema_;
+ }
+
+ template <bool UseSkiff>
+ void TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullStreamWorker(
+ const TMkqlInputSpec<UseSkiff>& spec,
+ IPullStreamWorker* worker,
+ const TVector<IInputStream*>& streams
+ ) {
+ YQL_ENSURE(
+ worker->GetInputsCount() == streams.size(),
+ "number of input streams should match number of inputs provided by spec");
+
+ TVector<THolder<NYT::TRawTableReader>> wrappers;
+ auto& scopedAlloc = worker->GetScopedAlloc();
+ for (ui32 i = 0; i < streams.size(); ++i) {
+ wrappers.push_back(MakeHolder<TRawTableReaderImpl>(streams[i], scopedAlloc));
+ }
+
+ NYql::NPureCalc::TInputSpecTraits<NYql::NPureCalc::TMkqlInputSpec<UseSkiff>>::PreparePullStreamWorker(
+ spec,
+ worker,
+ std::move(wrappers)
+ );
+ }
+
+ template <bool UseSkiff>
+ void TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullStreamWorker(
+ const TMkqlInputSpec<UseSkiff>& spec,
+ IPullStreamWorker* worker,
+ IInputStream* stream
+ ) {
+ TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullStreamWorker(
+ spec,
+ worker,
+ TVector<IInputStream*>({stream})
+ );
+ }
+
+ template <bool UseSkiff>
+ void TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullStreamWorker(
+ const TMkqlInputSpec<UseSkiff>& spec,
+ IPullStreamWorker* worker,
+ TVector<THolder<IInputStream>>&& streams
+ ) {
+ YQL_ENSURE(
+ worker->GetInputsCount() == streams.size(),
+ "number of input streams should match number of inputs provided by spec");
+
+ TVector<THolder<NYT::TRawTableReader>> wrappers;
+ auto& scopedAlloc = worker->GetScopedAlloc();
+ for (ui32 i = 0; i < streams.size(); ++i) {
+ wrappers.push_back(MakeHolder<TRawTableReaderImpl>(std::move(streams[i]), scopedAlloc));
+ }
+
+ TInputSpecTraits<NYql::NPureCalc::TMkqlInputSpec<UseSkiff>>::PreparePullStreamWorker(
+ spec,
+ worker,
+ std::move(wrappers)
+ );
+ }
+
+ template <bool UseSkiff>
+ void TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullStreamWorker(
+ const TMkqlInputSpec<UseSkiff>& spec,
+ IPullStreamWorker* worker,
+ THolder<IInputStream> stream
+ ) {
+ TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullStreamWorker(
+ spec,
+ worker,
+ VectorFromHolder<IInputStream>(std::move(stream))
+ );
+ }
+
+ template <bool UseSkiff>
+ void TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullStreamWorker(
+ const TMkqlInputSpec<UseSkiff>& spec,
+ IPullStreamWorker* worker,
+ const TVector<NYT::TRawTableReader*>& streams
+ ) {
+ YQL_ENSURE(
+ worker->GetInputsCount() == streams.size(),
+ "number of input streams should match number of inputs provided by spec");
+
+ with_lock(worker->GetScopedAlloc()) {
+ auto& holderFactory = worker->GetGraph().GetHolderFactory();
+ for (ui32 i = 0; i < streams.size(); ++i) {
+ TInputDescription inputDescription(spec, i);
+ auto input = holderFactory.Create<TMkqlListValue>(
+ UseSkiff, streams[i], worker, inputDescription, spec.IgnoreStreamTableIndex(), spec.IsStrictSchema()
+ );
+ worker->SetInput(std::move(input), i);
+ }
+ }
+ }
+
+ template <bool UseSkiff>
+ void TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullStreamWorker(
+ const TMkqlInputSpec<UseSkiff>& spec,
+ IPullStreamWorker* worker,
+ NYT::TRawTableReader* stream
+ ) {
+ TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullStreamWorker(
+ spec,
+ worker,
+ TVector<NYT::TRawTableReader*>({stream})
+ );
+ }
+
+ template <bool UseSkiff>
+ void TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullStreamWorker(
+ const TMkqlInputSpec<UseSkiff>& spec,
+ IPullStreamWorker* worker,
+ TVector<THolder<NYT::TRawTableReader>>&& streams
+ ) {
+ YQL_ENSURE(
+ worker->GetInputsCount() == streams.size(),
+ "number of input streams should match number of inputs provided by spec");
+
+ with_lock(worker->GetScopedAlloc()) {
+ auto& holderFactory = worker->GetGraph().GetHolderFactory();
+ for (ui32 i = 0; i < streams.size(); ++i) {
+ TInputDescription inputDescription(spec, i);
+ auto input = holderFactory.Create<TMkqlListValue>(
+ UseSkiff, std::move(streams[i]), worker, inputDescription, spec.IgnoreStreamTableIndex(), spec.IsStrictSchema()
+ );
+ worker->SetInput(std::move(input), i);
+ }
+ }
+ }
+
+ template <bool UseSkiff>
+ void TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullStreamWorker(
+ const TMkqlInputSpec<UseSkiff>& spec,
+ IPullStreamWorker* worker,
+ THolder<NYT::TRawTableReader> stream
+ ) {
+ TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullStreamWorker(
+ spec,
+ worker,
+ VectorFromHolder<NYT::TRawTableReader>(std::move(stream))
+ );
+ }
+
+ template <bool UseSkiff>
+ void TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullListWorker(
+ const TMkqlInputSpec<UseSkiff>& spec,
+ IPullListWorker* worker,
+ const TVector<IInputStream*>& streams
+ ) {
+ YQL_ENSURE(
+ worker->GetInputsCount() == streams.size(),
+ "number of input streams should match number of inputs provided by spec");
+
+ TVector<THolder<NYT::TRawTableReader>> wrappers;
+ auto& scopedAlloc = worker->GetScopedAlloc();
+ for (ui32 i = 0; i < streams.size(); ++i) {
+ wrappers.push_back(MakeHolder<TRawTableReaderImpl>(streams[i], scopedAlloc));
+ }
+
+ NYql::NPureCalc::TInputSpecTraits<NYql::NPureCalc::TMkqlInputSpec<UseSkiff>>::PreparePullListWorker(
+ spec,
+ worker,
+ std::move(wrappers)
+ );
+ }
+
+ template <bool UseSkiff>
+ void TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullListWorker(
+ const TMkqlInputSpec<UseSkiff>& spec,
+ IPullListWorker* worker,
+ IInputStream* stream
+ ) {
+ TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullListWorker(
+ spec,
+ worker,
+ TVector<IInputStream*>({stream})
+ );
+ }
+
+ template <bool UseSkiff>
+ void TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullListWorker(
+ const TMkqlInputSpec<UseSkiff>& spec,
+ IPullListWorker* worker,
+ TVector<THolder<IInputStream>>&& streams
+ ) {
+ YQL_ENSURE(
+ worker->GetInputsCount() == streams.size(),
+ "number of input streams should match number of inputs provided by spec");
+
+ TVector<THolder<NYT::TRawTableReader>> wrappers;
+ auto& scopedAlloc = worker->GetScopedAlloc();
+ for (ui32 i = 0; i < streams.size(); ++i) {
+ wrappers.push_back(MakeHolder<TRawTableReaderImpl>(std::move(streams[i]), scopedAlloc));
+ }
+
+ NYql::NPureCalc::TInputSpecTraits<NYql::NPureCalc::TMkqlInputSpec<UseSkiff>>::PreparePullListWorker(
+ spec,
+ worker,
+ std::move(wrappers)
+ );
+ }
+
+ template <bool UseSkiff>
+ void TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullListWorker(
+ const TMkqlInputSpec<UseSkiff>& spec,
+ IPullListWorker* worker,
+ THolder<IInputStream> stream
+ ) {
+ TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullListWorker(
+ spec,
+ worker,
+ VectorFromHolder<IInputStream>(std::move(stream))
+ );
+ }
+
+ template <bool UseSkiff>
+ void TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullListWorker(
+ const TMkqlInputSpec<UseSkiff>& spec,
+ IPullListWorker* worker,
+ const TVector<NYT::TRawTableReader*>& streams
+ ) {
+ YQL_ENSURE(
+ worker->GetInputsCount() == streams.size(),
+ "number of input streams should match number of inputs provided by spec");
+
+ with_lock(worker->GetScopedAlloc()) {
+ auto& holderFactory = worker->GetGraph().GetHolderFactory();
+ for (ui32 i = 0; i < streams.size(); ++i) {
+ TInputDescription inputDescription(spec, i);
+ auto input = holderFactory.Create<TMkqlListValue>(
+ UseSkiff, streams[i], worker, inputDescription, spec.IgnoreStreamTableIndex(), spec.IsStrictSchema()
+ );
+ worker->SetInput(std::move(input), i);
+ }
+ }
+ }
+
+ template <bool UseSkiff>
+ void TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullListWorker(
+ const TMkqlInputSpec<UseSkiff>& spec,
+ IPullListWorker* worker,
+ NYT::TRawTableReader* stream
+ ) {
+ TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullListWorker(
+ spec,
+ worker,
+ TVector<NYT::TRawTableReader*>({stream})
+ );
+ }
+
+ template <bool UseSkiff>
+ void TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullListWorker(
+ const TMkqlInputSpec<UseSkiff>& spec,
+ IPullListWorker* worker,
+ TVector<THolder<NYT::TRawTableReader>>&& streams
+ ) {
+ YQL_ENSURE(
+ worker->GetInputsCount() == streams.size(),
+ "number of input streams should match number of inputs provided by spec");
+
+ with_lock(worker->GetScopedAlloc()) {
+ auto& holderFactory = worker->GetGraph().GetHolderFactory();
+ for (ui32 i = 0; i < streams.size(); ++i) {
+ TInputDescription inputDescription(spec, i);
+ auto input = holderFactory.Create<TMkqlListValue>(
+ UseSkiff, std::move(streams[i]), worker, inputDescription, spec.IgnoreStreamTableIndex(), spec.IsStrictSchema()
+ );
+ worker->SetInput(std::move(input), i);
+ }
+ }
+ }
+
+ template <bool UseSkiff>
+ void TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullListWorker(
+ const TMkqlInputSpec<UseSkiff>& spec,
+ IPullListWorker* worker,
+ THolder<NYT::TRawTableReader> stream
+ ) {
+ TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullListWorker(
+ spec,
+ worker,
+ VectorFromHolder<NYT::TRawTableReader>(std::move(stream))
+ );
+ }
+
+ template <bool UseSkiff>
+ THolder<THandle> TOutputSpecTraits<TMkqlOutputSpec<UseSkiff>>::ConvertPullListWorkerToOutputType(
+ const NYql::NPureCalc::TMkqlOutputSpec<UseSkiff>&,
+ NYql::NPureCalc::TWorkerHolder<NYql::NPureCalc::IPullListWorker> worker
+ ) {
+ with_lock(worker->GetScopedAlloc()) {
+ return MakeHolder<TPullListMkqlWriter>(std::move(worker), UseSkiff);
+ }
+ }
+
+ template <bool UseSkiff>
+ THolder<THandle> TOutputSpecTraits<TMkqlOutputSpec<UseSkiff>>::ConvertPullStreamWorkerToOutputType(
+ const NYql::NPureCalc::TMkqlOutputSpec<UseSkiff>&,
+ NYql::NPureCalc::TWorkerHolder<NYql::NPureCalc::IPullStreamWorker> worker
+ ) {
+ with_lock(worker->GetScopedAlloc()) {
+ return MakeHolder<TPullStreamMkqlWriter>(std::move(worker), UseSkiff);
+ }
+ }
+
+ template class TMkqlSpec<true, TInputSpecBase>;
+ template class TMkqlSpec<false, TInputSpecBase>;
+ template class TMkqlSpec<true, TOutputSpecBase>;
+ template class TMkqlSpec<false, TOutputSpecBase>;
+
+ template class TMkqlInputSpec<true>;
+ template class TMkqlInputSpec<false>;
+ template class TMkqlOutputSpec<true>;
+ template class TMkqlOutputSpec<false>;
+
+ template struct TInputSpecTraits<TMkqlInputSpec<true>>;
+ template struct TInputSpecTraits<TMkqlInputSpec<false>>;
+ template struct TOutputSpecTraits<TMkqlOutputSpec<true>>;
+ template struct TOutputSpecTraits<TMkqlOutputSpec<false>>;
+ }
+}
diff --git a/ydb/library/yql/public/purecalc/io_specs/mkql/spec.h b/ydb/library/yql/public/purecalc/io_specs/mkql/spec.h
new file mode 100644
index 0000000000..ef4ceea6a2
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/io_specs/mkql/spec.h
@@ -0,0 +1,231 @@
+#pragma once
+
+#include <ydb/library/yql/public/purecalc/common/interface.h>
+
+#include <util/generic/noncopyable.h>
+
+namespace NYT {
+ class TRawTableReader;
+}
+
+namespace NYql {
+ namespace NPureCalc {
+ /**
+ * Processing mode for working with Skiff/YSON IO.
+ *
+ * In this mode purecalc accepts vector of pointers to `IInputStream` as an inputs and returns a handle
+ * which can be used to invoke program writing all output to a stream.
+ *
+ * For example:
+ *
+ * @code
+ * auto handle = program.Apply(&Cin);
+ * handle->Run(&Cout); // run the program, read from Cin and write to Cout
+ * @endcode
+ *
+ * All working modes except PushStream are supported.
+ */
+ template <bool UseSkiff, typename TBase>
+ class TMkqlSpec: public TBase {
+ static_assert(
+ std::is_same<TBase, TInputSpecBase>::value ||
+ std::is_same<TBase, TOutputSpecBase>::value,
+ "Class is used in unintended way!"
+ );
+ };
+
+ /**
+ * Skiff/YSON input spec. In this mode purecalc takes a non-owning pointers to a text input streams and parses
+ * them using Skiff or YSON codec.
+ *
+ * The program synopsis follows:
+ *
+ * @code
+ * ... TPullStreamProgram::Apply(TVector<IInputStream*>);
+ * ... TPullStreamProgram::Apply(TVector<NYT::TRawTableReader*>);
+ * ... TPullListProgram::Apply(TVector<IInputStream*>);
+ * ... TPullListProgram::Apply(TVector<NYT::TRawTableReader*>);
+ * @endcode
+ *
+ * @tparam UseSkiff expect Skiff format if true, YSON otherwise.
+ */
+ template <bool UseSkiff>
+ class TMkqlInputSpec: public TMkqlSpec<UseSkiff, TInputSpecBase> {
+ public:
+ using TBase = TInputSpecBase;
+ static constexpr bool UseSkiffValue = UseSkiff;
+
+ private:
+ TVector<NYT::TNode> Schemas_;
+ bool StrictSchema_ = true;
+ bool IgnoreStreamTableIndex_ = false;
+ TVector<TMaybe<TVector<TString>>> AllTableNames_;
+ // Allows to read structure columns with custom members order.
+ // Instead of chain TNode => TTypeAnnotationNode => TType => TNode (which looses members order) use
+ // original schema as row spec.
+ bool UseOriginalRowSpec_ = false;
+
+ public:
+ explicit TMkqlInputSpec(TVector<NYT::TNode>);
+ explicit TMkqlInputSpec(NYT::TNode, bool ignoreStreamTableIndex = false);
+
+ const TVector<NYT::TNode>& GetSchemas() const override;
+
+ bool IgnoreStreamTableIndex() const;
+
+ bool IsStrictSchema() const;
+ TMkqlInputSpec& SetStrictSchema(bool strictSchema);
+
+ const TMaybe<TVector<TString>>& GetTableNames() const;
+ const TMaybe<TVector<TString>>& GetTableNames(ui32) const;
+ bool UseOriginalRowSpec() const;
+
+ TMkqlInputSpec& SetTableNames(TVector<TString>);
+ TMkqlInputSpec& SetTableNames(TVector<TString>, ui32);
+ TMkqlInputSpec& SetUseOriginalRowSpec(bool value);
+ };
+
+ /**
+ * Skiff/YSON output. In this mode purecalc returns a handle which can be used to invoke an underlying program.
+ *
+ * So far this is the only spec that supports multi-table output.
+ *
+ * The program synopsis follows:
+ *
+ * @code
+ * THolder<THandle> TPullStreamProgram::Apply(...);
+ * THolder<THandle> TPullListProgram::Apply(...);
+ * @endcode
+ *
+ * @tparam UseSkiff write output in Skiff format if true, use YSON otherwise.
+ */
+ template <bool UseSkiff>
+ class TMkqlOutputSpec: public TMkqlSpec<UseSkiff, TOutputSpecBase> {
+ public:
+ using TMkqlSpec<UseSkiff, TOutputSpecBase>::TMkqlSpec;
+
+ using TBase = TOutputSpecBase;
+ static constexpr bool UseSkiffValue = UseSkiff;
+
+ private:
+ NYT::TNode Schema_;
+
+ public:
+ explicit TMkqlOutputSpec(NYT::TNode);
+
+ const NYT::TNode& GetSchema() const override;
+ };
+
+ /**
+ * A class which can invoke a purecalc program and store its output in the given output stream.
+ */
+ class THandle: private TMoveOnly {
+ public:
+ /**
+ * Run the program. Read a chunk from the program's assigned input, parse it and pass it to the program.
+ * Than serialize the program's output and write it to the given output stream. Repeat until the input
+ * stream is empty.
+ */
+ /// @{
+ /**
+ * Overload for single-table output programs (i.e. output type is struct).
+ */
+ virtual void Run(IOutputStream*) = 0;
+ /**
+ * Overload for multi-table output programs (i.e. output type is variant over tuple).
+ * Size of vector should match number of variant alternatives.
+ */
+ virtual void Run(const TVector<IOutputStream*>&) = 0;
+ /**
+ * Overload for multi-table output programs (i.e. output type is variant over struct).
+ * Size of map should match number of variant alternatives. For every alternative there should be a stream
+ * in the map.
+ */
+ virtual void Run(const TMap<TString, IOutputStream*>&) = 0;
+ /// @}
+
+ virtual ~THandle() = default;
+ };
+
+ template <bool UseSkiff>
+ struct TInputSpecTraits<TMkqlInputSpec<UseSkiff>> {
+ static const constexpr bool IsPartial = false;
+
+ static const constexpr bool SupportPullStreamMode = true;
+ static const constexpr bool SupportPullListMode = true;
+ static const constexpr bool SupportPushStreamMode = false;
+
+ static void PreparePullStreamWorker(
+ const TMkqlInputSpec<UseSkiff>& spec, IPullStreamWorker* worker, const TVector<IInputStream*>& streams);
+
+ static void PreparePullStreamWorker(
+ const TMkqlInputSpec<UseSkiff>& spec, IPullStreamWorker* worker, TVector<THolder<IInputStream>>&& streams);
+
+ static void PreparePullStreamWorker(
+ const TMkqlInputSpec<UseSkiff>& spec, IPullStreamWorker* worker, const TVector<NYT::TRawTableReader*>& streams);
+
+ static void PreparePullStreamWorker(
+ const TMkqlInputSpec<UseSkiff>& spec, IPullStreamWorker* worker, TVector<THolder<NYT::TRawTableReader>>&& streams);
+
+ static void PreparePullListWorker(
+ const TMkqlInputSpec<UseSkiff>& spec, IPullListWorker* worker, const TVector<IInputStream*>& streams);
+
+ static void PreparePullListWorker(
+ const TMkqlInputSpec<UseSkiff>& spec, IPullListWorker* worker, TVector<THolder<IInputStream>>&& streams);
+
+ static void PreparePullListWorker(
+ const TMkqlInputSpec<UseSkiff>& spec, IPullListWorker* worker, const TVector<NYT::TRawTableReader*>& streams);
+
+ static void PreparePullListWorker(
+ const TMkqlInputSpec<UseSkiff>& spec, IPullListWorker* worker, TVector<THolder<NYT::TRawTableReader>>&& streams);
+
+ // Members for single-input programs
+
+ static void PreparePullStreamWorker(
+ const TMkqlInputSpec<UseSkiff>& spec, IPullStreamWorker* worker, IInputStream* stream);
+
+ static void PreparePullStreamWorker(
+ const TMkqlInputSpec<UseSkiff>& spec, IPullStreamWorker* worker, THolder<IInputStream> stream);
+
+ static void PreparePullStreamWorker(
+ const TMkqlInputSpec<UseSkiff>& spec, IPullStreamWorker* worker, NYT::TRawTableReader* stream);
+
+ static void PreparePullStreamWorker(
+ const TMkqlInputSpec<UseSkiff>& spec, IPullStreamWorker* worker, THolder<NYT::TRawTableReader> stream);
+
+ static void PreparePullListWorker(
+ const TMkqlInputSpec<UseSkiff>& spec, IPullListWorker* worker, IInputStream* stream);
+
+ static void PreparePullListWorker(
+ const TMkqlInputSpec<UseSkiff>& spec, IPullListWorker* worker, THolder<IInputStream> stream);
+
+ static void PreparePullListWorker(
+ const TMkqlInputSpec<UseSkiff>& spec, IPullListWorker* worker, NYT::TRawTableReader* stream);
+
+ static void PreparePullListWorker(
+ const TMkqlInputSpec<UseSkiff>& spec, IPullListWorker* worker, THolder<NYT::TRawTableReader> stream);
+ };
+
+ template <bool UseSkiff>
+ struct TOutputSpecTraits<TMkqlOutputSpec<UseSkiff>> {
+ static const constexpr bool IsPartial = false;
+
+ static const constexpr bool SupportPullStreamMode = true;
+ static const constexpr bool SupportPullListMode = true;
+ static const constexpr bool SupportPushStreamMode = false;
+
+ using TPullStreamReturnType = THolder<THandle>;
+ using TPullListReturnType = THolder<THandle>;
+
+ static TPullStreamReturnType ConvertPullStreamWorkerToOutputType(const TMkqlOutputSpec<UseSkiff>&, TWorkerHolder<IPullStreamWorker>);
+
+ static TPullListReturnType ConvertPullListWorkerToOutputType(const TMkqlOutputSpec<UseSkiff>&, TWorkerHolder<IPullListWorker>);
+ };
+
+ using TSkiffInputSpec = TMkqlInputSpec<true>;
+ using TSkiffOutputSpec = TMkqlOutputSpec<true>;
+
+ using TYsonInputSpec = TMkqlInputSpec<false>;
+ using TYsonOutputSpec = TMkqlOutputSpec<false>;
+ }
+}
diff --git a/ydb/library/yql/public/purecalc/io_specs/mkql/ut/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/public/purecalc/io_specs/mkql/ut/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..08edd65bf8
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/io_specs/mkql/ut/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,77 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_executable(ydb-library-yql-public-purecalc-io_specs-mkql-ut)
+target_compile_options(ydb-library-yql-public-purecalc-io_specs-mkql-ut PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_link_libraries(ydb-library-yql-public-purecalc-io_specs-mkql-ut PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-cpuid_check
+ cpp-testing-unittest_main
+ udf-service-exception_policy
+ public-purecalc-common
+ purecalc-io_specs-mkql
+ purecalc-ut-lib
+)
+target_link_options(ydb-library-yql-public-purecalc-io_specs-mkql-ut PRIVATE
+ -Wl,-platform_version,macos,11.0,11.0
+ -fPIC
+ -fPIC
+ -framework
+ CoreFoundation
+)
+target_sources(ydb-library-yql-public-purecalc-io_specs-mkql-ut PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/mkql/ut/test_spec.cpp
+)
+set_property(
+ TARGET
+ ydb-library-yql-public-purecalc-io_specs-mkql-ut
+ PROPERTY
+ SPLIT_FACTOR
+ 1
+)
+add_yunittest(
+ NAME
+ ydb-library-yql-public-purecalc-io_specs-mkql-ut
+ TEST_TARGET
+ ydb-library-yql-public-purecalc-io_specs-mkql-ut
+ TEST_ARG
+ --print-before-suite
+ --print-before-test
+ --fork-tests
+ --print-times
+ --show-fails
+)
+set_yunittest_property(
+ TEST
+ ydb-library-yql-public-purecalc-io_specs-mkql-ut
+ PROPERTY
+ LABELS
+ MEDIUM
+)
+set_yunittest_property(
+ TEST
+ ydb-library-yql-public-purecalc-io_specs-mkql-ut
+ PROPERTY
+ PROCESSORS
+ 1
+)
+set_yunittest_property(
+ TEST
+ ydb-library-yql-public-purecalc-io_specs-mkql-ut
+ PROPERTY
+ TIMEOUT
+ 300
+)
+target_allocator(ydb-library-yql-public-purecalc-io_specs-mkql-ut
+ system_allocator
+)
+vcs_info(ydb-library-yql-public-purecalc-io_specs-mkql-ut)
diff --git a/ydb/library/yql/public/purecalc/io_specs/mkql/ut/CMakeLists.linux-aarch64.txt b/ydb/library/yql/public/purecalc/io_specs/mkql/ut/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..5027702669
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/io_specs/mkql/ut/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,80 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_executable(ydb-library-yql-public-purecalc-io_specs-mkql-ut)
+target_compile_options(ydb-library-yql-public-purecalc-io_specs-mkql-ut PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_link_libraries(ydb-library-yql-public-purecalc-io_specs-mkql-ut PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ cpp-testing-unittest_main
+ udf-service-exception_policy
+ public-purecalc-common
+ purecalc-io_specs-mkql
+ purecalc-ut-lib
+)
+target_link_options(ydb-library-yql-public-purecalc-io_specs-mkql-ut PRIVATE
+ -ldl
+ -lrt
+ -Wl,--no-as-needed
+ -fPIC
+ -fPIC
+ -lpthread
+ -lrt
+ -ldl
+)
+target_sources(ydb-library-yql-public-purecalc-io_specs-mkql-ut PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/mkql/ut/test_spec.cpp
+)
+set_property(
+ TARGET
+ ydb-library-yql-public-purecalc-io_specs-mkql-ut
+ PROPERTY
+ SPLIT_FACTOR
+ 1
+)
+add_yunittest(
+ NAME
+ ydb-library-yql-public-purecalc-io_specs-mkql-ut
+ TEST_TARGET
+ ydb-library-yql-public-purecalc-io_specs-mkql-ut
+ TEST_ARG
+ --print-before-suite
+ --print-before-test
+ --fork-tests
+ --print-times
+ --show-fails
+)
+set_yunittest_property(
+ TEST
+ ydb-library-yql-public-purecalc-io_specs-mkql-ut
+ PROPERTY
+ LABELS
+ MEDIUM
+)
+set_yunittest_property(
+ TEST
+ ydb-library-yql-public-purecalc-io_specs-mkql-ut
+ PROPERTY
+ PROCESSORS
+ 1
+)
+set_yunittest_property(
+ TEST
+ ydb-library-yql-public-purecalc-io_specs-mkql-ut
+ PROPERTY
+ TIMEOUT
+ 300
+)
+target_allocator(ydb-library-yql-public-purecalc-io_specs-mkql-ut
+ cpp-malloc-jemalloc
+)
+vcs_info(ydb-library-yql-public-purecalc-io_specs-mkql-ut)
diff --git a/ydb/library/yql/public/purecalc/io_specs/mkql/ut/CMakeLists.linux-x86_64.txt b/ydb/library/yql/public/purecalc/io_specs/mkql/ut/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..36c85aeacb
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/io_specs/mkql/ut/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,82 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_executable(ydb-library-yql-public-purecalc-io_specs-mkql-ut)
+target_compile_options(ydb-library-yql-public-purecalc-io_specs-mkql-ut PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_link_libraries(ydb-library-yql-public-purecalc-io_specs-mkql-ut PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-cpuid_check
+ cpp-testing-unittest_main
+ udf-service-exception_policy
+ public-purecalc-common
+ purecalc-io_specs-mkql
+ purecalc-ut-lib
+)
+target_link_options(ydb-library-yql-public-purecalc-io_specs-mkql-ut PRIVATE
+ -ldl
+ -lrt
+ -Wl,--no-as-needed
+ -fPIC
+ -fPIC
+ -lpthread
+ -lrt
+ -ldl
+)
+target_sources(ydb-library-yql-public-purecalc-io_specs-mkql-ut PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/mkql/ut/test_spec.cpp
+)
+set_property(
+ TARGET
+ ydb-library-yql-public-purecalc-io_specs-mkql-ut
+ PROPERTY
+ SPLIT_FACTOR
+ 1
+)
+add_yunittest(
+ NAME
+ ydb-library-yql-public-purecalc-io_specs-mkql-ut
+ TEST_TARGET
+ ydb-library-yql-public-purecalc-io_specs-mkql-ut
+ TEST_ARG
+ --print-before-suite
+ --print-before-test
+ --fork-tests
+ --print-times
+ --show-fails
+)
+set_yunittest_property(
+ TEST
+ ydb-library-yql-public-purecalc-io_specs-mkql-ut
+ PROPERTY
+ LABELS
+ MEDIUM
+)
+set_yunittest_property(
+ TEST
+ ydb-library-yql-public-purecalc-io_specs-mkql-ut
+ PROPERTY
+ PROCESSORS
+ 1
+)
+set_yunittest_property(
+ TEST
+ ydb-library-yql-public-purecalc-io_specs-mkql-ut
+ PROPERTY
+ TIMEOUT
+ 300
+)
+target_allocator(ydb-library-yql-public-purecalc-io_specs-mkql-ut
+ cpp-malloc-tcmalloc
+ libs-tcmalloc-no_percpu_cache
+)
+vcs_info(ydb-library-yql-public-purecalc-io_specs-mkql-ut)
diff --git a/ydb/library/yql/public/purecalc/io_specs/mkql/ut/CMakeLists.txt b/ydb/library/yql/public/purecalc/io_specs/mkql/ut/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/io_specs/mkql/ut/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/ydb/library/yql/public/purecalc/io_specs/mkql/ut/CMakeLists.windows-x86_64.txt b/ydb/library/yql/public/purecalc/io_specs/mkql/ut/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..be8ef1154c
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/io_specs/mkql/ut/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,70 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_executable(ydb-library-yql-public-purecalc-io_specs-mkql-ut)
+target_compile_options(ydb-library-yql-public-purecalc-io_specs-mkql-ut PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_link_libraries(ydb-library-yql-public-purecalc-io_specs-mkql-ut PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-cpuid_check
+ cpp-testing-unittest_main
+ udf-service-exception_policy
+ public-purecalc-common
+ purecalc-io_specs-mkql
+ purecalc-ut-lib
+)
+target_sources(ydb-library-yql-public-purecalc-io_specs-mkql-ut PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/mkql/ut/test_spec.cpp
+)
+set_property(
+ TARGET
+ ydb-library-yql-public-purecalc-io_specs-mkql-ut
+ PROPERTY
+ SPLIT_FACTOR
+ 1
+)
+add_yunittest(
+ NAME
+ ydb-library-yql-public-purecalc-io_specs-mkql-ut
+ TEST_TARGET
+ ydb-library-yql-public-purecalc-io_specs-mkql-ut
+ TEST_ARG
+ --print-before-suite
+ --print-before-test
+ --fork-tests
+ --print-times
+ --show-fails
+)
+set_yunittest_property(
+ TEST
+ ydb-library-yql-public-purecalc-io_specs-mkql-ut
+ PROPERTY
+ LABELS
+ MEDIUM
+)
+set_yunittest_property(
+ TEST
+ ydb-library-yql-public-purecalc-io_specs-mkql-ut
+ PROPERTY
+ PROCESSORS
+ 1
+)
+set_yunittest_property(
+ TEST
+ ydb-library-yql-public-purecalc-io_specs-mkql-ut
+ PROPERTY
+ TIMEOUT
+ 300
+)
+target_allocator(ydb-library-yql-public-purecalc-io_specs-mkql-ut
+ system_allocator
+)
+vcs_info(ydb-library-yql-public-purecalc-io_specs-mkql-ut)
diff --git a/ydb/library/yql/public/purecalc/io_specs/mkql/ut/test.inl b/ydb/library/yql/public/purecalc/io_specs/mkql/ut/test.inl
new file mode 100644
index 0000000000..e148bb2bab
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/io_specs/mkql/ut/test.inl
@@ -0,0 +1,777 @@
+Y_UNIT_TEST_SUITE(TEST_SUITE_NAME) {
+ using NYql::NPureCalc::NPrivate::GetSchema;
+
+ Y_UNIT_TEST(TestAllTypes) {
+ using namespace NYql::NPureCalc;
+
+ TVector<TString> fields {"int64", "uint64", "double", "bool", "string", "yson"};
+ auto schema = GetSchema(fields);
+ auto stream = GET_STREAM(fields);
+
+ auto factory = MakeProgramFactory();
+
+ {
+ auto program = CREATE_PROGRAM(
+ INPUT_SPEC {schema},
+ OUTPUT_SPEC {schema},
+ "SELECT * FROM Input",
+ ETranslationMode::SQL, 1
+ );
+
+ auto input = TStringStream(stream);
+ auto handle = program->Apply(&input);
+ TStringStream output;
+ handle->Run(&output);
+
+ ASSERT_EQUAL_STREAMS(stream, output);
+ }
+
+ // invalid table prefix
+ UNIT_ASSERT_EXCEPTION_CONTAINS([&](){
+ CREATE_PROGRAM(
+ INPUT_SPEC {schema},
+ OUTPUT_SPEC {schema},
+ "SELECT * FROM Table",
+ ETranslationMode::SQL, 1
+ );
+ }(), TCompileError, "Failed to optimize");
+
+ // invalid table suffix (input index)
+ UNIT_ASSERT_EXCEPTION_CONTAINS([&](){
+ CREATE_PROGRAM(
+ INPUT_SPEC {schema},
+ OUTPUT_SPEC {schema},
+ "SELECT * FROM Input1",
+ ETranslationMode::SQL, 1
+ );
+ }(), TCompileError, "Failed to optimize");
+ }
+
+ Y_UNIT_TEST(TestColumnsFilter) {
+ using namespace NYql::NPureCalc;
+
+ TVector<TString> fields {"int64", "uint64", "double", "bool", "string", "yson"};
+ auto schema = GetSchema(fields);
+ auto stream = GET_STREAM(fields);
+
+ TVector<TString> someFields {"int64", "bool", "string"};
+ auto someSchema = GetSchema(someFields);
+ auto someStream = GET_STREAM(someFields);
+
+ auto factory = MakeProgramFactory();
+
+ {
+ auto inputSpec = INPUT_SPEC {schema};
+ auto outputSpec = OUTPUT_SPEC {someSchema};
+
+ auto program = CREATE_PROGRAM(
+ inputSpec,
+ outputSpec,
+ "SELECT `int64`, `bool`, `string` FROM Input",
+ ETranslationMode::SQL, 1
+ );
+
+ UNIT_ASSERT_VALUES_EQUAL(
+ program->GetUsedColumns(),
+ THashSet<TString>(someFields.begin(), someFields.end())
+ );
+
+ UNIT_ASSERT_VALUES_EQUAL(
+ program->GetUsedColumns(0),
+ program->GetUsedColumns()
+ );
+
+ UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){
+ auto unused = program->GetUsedColumns(1);
+ }()), yexception, "invalid input index (1) in GetUsedColumns call");
+
+ auto input = TStringStream(stream);
+ auto handle = program->Apply(&input);
+ TStringStream output;
+ handle->Run(&output);
+
+ ASSERT_EQUAL_STREAMS(someStream, output);
+
+ UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){
+ auto outputs = TVector<IOutputStream*>({});
+ program->Apply(&input)->Run(outputs);
+ }()), yexception, "cannot be used with single-output programs");
+
+ UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){
+ auto outputs = TVector<IOutputStream*>({&output});
+ program->Apply(&input)->Run(outputs);
+ }()), yexception, "cannot be used with single-output programs");
+
+ UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){
+ auto outputs = TMap<TString, IOutputStream*>();
+ program->Apply(&input)->Run(outputs);
+ }()), yexception, "cannot be used with single-output programs");
+ }
+ }
+
+#ifdef PULL_LIST_MODE
+ Y_UNIT_TEST(TestColumnsFilterMultiInput) {
+ using namespace NYql::NPureCalc;
+
+ TVector<TString> fields0 {"int64", "uint64", "double"};
+ auto schema0 = GetSchema(fields0);
+ TVector<TString> someFields0 {"int64", "uint64"};
+
+ TVector<TString> fields1 {"bool", "string", "yson"};
+ auto schema1 = GetSchema(fields1);
+ TVector<TString> someFields1 {"bool", "yson"};
+
+ TVector<TString> unitedFields {"int64", "uint64", "bool", "yson"};
+ auto unitedSchema = GetSchema(unitedFields, unitedFields);
+
+ auto factory = MakeProgramFactory();
+
+ {
+ auto inputSpec = INPUT_SPEC {{schema0, schema1}};
+ auto outputSpec = OUTPUT_SPEC {unitedSchema};
+
+ auto program = CREATE_PROGRAM(
+ inputSpec,
+ outputSpec,
+ R"(
+SELECT `int64`, `uint64` FROM Input0
+UNION ALL
+SELECT `bool`, `yson` FROM Input1
+ )",
+ ETranslationMode::SQL, 1
+ );
+
+ UNIT_ASSERT_VALUES_EQUAL(
+ program->GetUsedColumns(0),
+ THashSet<TString>(someFields0.begin(), someFields0.end())
+ );
+
+ UNIT_ASSERT_VALUES_EQUAL(
+ program->GetUsedColumns(1),
+ THashSet<TString>(someFields1.begin(), someFields1.end())
+ );
+
+ UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){
+ auto unused = program->GetUsedColumns();
+ }()), yexception, "GetUsedColumns() can be used only with single-input programs");
+
+ UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){
+ auto unused = program->GetUsedColumns(2);
+ }()), yexception, "invalid input index (2) in GetUsedColumns call");
+ }
+ }
+#endif
+
+ Y_UNIT_TEST(TestColumnsFilterWithOptionalFields) {
+ using namespace NYql::NPureCalc;
+
+ TVector<TString> fields {"int64", "uint64", "double", "bool", "string", "yson"};
+ auto schema = GetSchema(fields);
+ auto stream = GET_STREAM(fields);
+
+ TVector<TString> someFields {"int64", "bool", "string"};
+ TVector<TString> someOptionalFields {"string"};
+
+ auto someSchema = GetSchema(someFields);
+ auto someStream = GET_STREAM(someFields, someOptionalFields);
+ auto someOptionalSchema = GetSchema(someFields, someOptionalFields);
+
+ auto factory = MakeProgramFactory();
+
+ {
+ auto program = CREATE_PROGRAM(
+ INPUT_SPEC {schema},
+ OUTPUT_SPEC {someOptionalSchema},
+ "SELECT `int64`, `bool`, Nothing(String?) as `string` FROM Input",
+ ETranslationMode::SQL, 1
+ );
+
+ UNIT_ASSERT_VALUES_EQUAL(
+ program->GetUsedColumns(),
+ THashSet<TString>({"int64", "bool"})
+ );
+
+ UNIT_ASSERT_VALUES_EQUAL(
+ program->GetUsedColumns(),
+ program->GetUsedColumns(0)
+ );
+
+ auto input = TStringStream(stream);
+ auto handle = program->Apply(&input);
+ TStringStream output;
+ handle->Run(&output);
+
+ ASSERT_EQUAL_STREAMS(someStream, output);
+ }
+
+ UNIT_ASSERT_EXCEPTION_CONTAINS([&](){
+ CREATE_PROGRAM(
+ INPUT_SPEC {schema},
+ OUTPUT_SPEC {someSchema},
+ "SELECT `int64`, `bool`, Nothing(String?) as `string` FROM Input",
+ ETranslationMode::SQL, 1
+ );
+ }(), TCompileError, "Failed to optimize");
+ }
+
+ Y_UNIT_TEST(TestOutputSpecInference) {
+ using namespace NYql::NPureCalc;
+
+ TVector<TString> fields {"int64", "uint64", "double", "bool", "string"};
+ auto schema = GetSchema(fields);
+ auto stream = GET_STREAM(fields);
+
+ TVector<TString> someFields {"bool", "int64", "string"}; // Keep this sorted...
+ auto someSchema = GetSchema(someFields);
+ auto someStream = GET_STREAM(someFields);
+
+ auto factory = MakeProgramFactory();
+
+ {
+ auto inputSpec = INPUT_SPEC {schema};
+ auto outputSpec = OUTPUT_SPEC {NYT::TNode::CreateEntity()};
+
+ auto program = CREATE_PROGRAM(
+ inputSpec,
+ outputSpec,
+ "SELECT `int64`, `bool`, `string` FROM Input",
+ ETranslationMode::SQL, 1
+ );
+
+ UNIT_ASSERT_EQUAL(program->MakeFullOutputSchema(), someSchema);
+
+ UNIT_ASSERT_VALUES_EQUAL(
+ program->GetUsedColumns(),
+ THashSet<TString>(someFields.begin(), someFields.end())
+ );
+
+ UNIT_ASSERT_VALUES_EQUAL(
+ program->GetUsedColumns(),
+ program->GetUsedColumns(0)
+ );
+
+ auto input = TStringStream(stream);
+ auto handle = program->Apply(&input);
+ TStringStream output;
+ handle->Run(&output);
+
+ ASSERT_EQUAL_STREAMS(someStream, output);
+ }
+ }
+
+#ifdef PULL_LIST_MODE
+ Y_UNIT_TEST(TestJoinInputs) {
+ using namespace NYql::NPureCalc;
+
+ TVector<TString> fields0 {"int64", "uint64", "double"};
+ auto schema0 = GetSchema(fields0);
+ auto stream0 = GET_STREAM(fields0);
+
+ TVector<TString> fields1 {"int64", "bool", "string"};
+ auto schema1 = GetSchema(fields1);
+ auto stream1 = GET_STREAM(fields1);
+
+ TVector<TString> joinedFields {"bool", "double", "int64", "string", "uint64"}; // keep this sorted
+ auto joinedSchema = GetSchema(joinedFields);
+ auto joinedStream = GET_STREAM(joinedFields);
+
+ auto factory = MakeProgramFactory();
+
+ {
+ auto inputSpec = INPUT_SPEC {{schema0, schema1}};
+ auto outputSpec = OUTPUT_SPEC {NYT::TNode::CreateEntity()};
+
+ auto program = CREATE_PROGRAM(
+ inputSpec,
+ outputSpec,
+ R"(
+SELECT
+ t0.`int64` AS `int64`,
+ t0.`uint64` AS `uint64`,
+ t0.`double` AS `double`,
+ t1.`bool` AS `bool`,
+ t1.`string` AS `string`
+FROM
+ Input0 AS t0
+INNER JOIN
+ Input1 AS t1
+ON t0.`int64` == t1.`int64`
+ORDER BY `int64`
+ )",
+ ETranslationMode::SQL, 1
+ );
+
+ UNIT_ASSERT_EQUAL(program->MakeFullOutputSchema(), joinedSchema);
+
+ UNIT_ASSERT_VALUES_EQUAL(
+ program->GetUsedColumns(0),
+ THashSet<TString>(fields0.begin(), fields0.end())
+ );
+
+ UNIT_ASSERT_VALUES_EQUAL(
+ program->GetUsedColumns(1),
+ THashSet<TString>(fields1.begin(), fields1.end())
+ );
+
+ TStringStream input0(stream0);
+ TStringStream input1(stream1);
+ auto handle = program->Apply<TVector<IInputStream*>>({&input0, &input1});
+ TStringStream output;
+ handle->Run(&output);
+
+ ASSERT_EQUAL_STREAMS(joinedStream, output);
+ }
+ }
+#endif
+
+ Y_UNIT_TEST(TestMultiOutputOverTuple) {
+ using namespace NYql::NPureCalc;
+
+ TVector<TString> fields {"int64", "uint64", "double", "bool", "string"};
+ auto schema = GetSchema(fields);
+ auto stream = GET_STREAM(fields, {}, 0, 10, 1);
+
+ TVector<TString> someFields1 {"bool", "int64", "string"};
+ auto someSchema1 = GetSchema(someFields1);
+ auto someStream1 = GET_STREAM(someFields1, {}, 0, 10, 2);
+
+ TVector<TString> someFields2 {"bool", "double"};
+ auto someSchema2 = GetSchema(someFields2);
+ auto someStream2 = GET_STREAM(someFields2, {}, 1, 10, 2);
+
+ auto factory = MakeProgramFactory();
+
+ {
+ auto inputSpec = INPUT_SPEC {schema};
+ auto outputSpec = OUTPUT_SPEC {NYT::TNode::CreateEntity()};
+
+ auto program = CREATE_PROGRAM(
+ inputSpec,
+ outputSpec,
+ R"(
+(
+ (let vt (ParseType '"Variant<Struct<bool:Bool, int64:Int64, string:String>, Struct<bool:Bool, double:Double>>"))
+ (return (Map (Self '0) (lambda '(x) (block '(
+ (let r1 (Variant (AsStruct '('bool (Member x 'bool)) '('int64 (Member x 'int64)) '('string (Member x 'string))) '0 vt))
+ (let r2 (Variant (AsStruct '('bool (Member x 'bool)) '('double (Member x 'double))) '1 vt))
+ (return (If (Coalesce (== (% (Member x 'int64) (Int64 '2)) (Int64 '0)) (Bool 'false)) r1 r2))
+ )))))
+)
+ )",
+ ETranslationMode::SExpr
+ );
+
+ auto input = TStringStream(stream);
+ auto handle = program->Apply(&input);
+ TStringStream output1, output2;
+ auto outputs = TVector<IOutputStream*>({&output1, &output2});
+ handle->Run(outputs);
+ ASSERT_EQUAL_STREAMS(someStream1, output1);
+ ASSERT_EQUAL_STREAMS(someStream2, output2);
+
+ UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){
+ program->Apply(&input)->Run(&output1);
+ }()), yexception, "cannot be used with multi-output programs");
+
+ UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){
+ auto outputs = TVector<IOutputStream*>({});
+ program->Apply(&input)->Run(outputs);
+ }()), yexception, "Number of variant alternatives should match number of streams");
+
+ UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){
+ auto outputs = TVector<IOutputStream*>({&output1, &output1, &output1});
+ program->Apply(&input)->Run(outputs);
+ }()), yexception, "Number of variant alternatives should match number of streams");
+
+ UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){
+ auto outputs = TMap<TString, IOutputStream*>();
+ program->Apply(&input)->Run(outputs);
+ }()), yexception, "cannot be used to process variants over tuple");
+ }
+ }
+
+ Y_UNIT_TEST(TestMultiOutputOverStruct) {
+ using namespace NYql::NPureCalc;
+
+ TVector<TString> fields {"int64", "uint64", "double", "bool", "string"};
+ auto schema = GetSchema(fields);
+ auto stream = GET_STREAM(fields, {}, 0, 10, 1);
+
+ TVector<TString> someFields1 {"bool", "int64", "string"};
+ auto someSchema1 = GetSchema(someFields1);
+ auto someStream1 = GET_STREAM(someFields1, {}, 0, 10, 2);
+
+ TVector<TString> someFields2 {"bool", "double"};
+ auto someSchema2 = GetSchema(someFields2);
+ auto someStream2 = GET_STREAM(someFields2, {}, 1, 10, 2);
+
+ auto factory = MakeProgramFactory();
+
+ {
+ auto inputSpec = INPUT_SPEC {schema};
+ auto outputSpec = OUTPUT_SPEC {NYT::TNode::CreateEntity()};
+
+ auto program = CREATE_PROGRAM(
+ inputSpec,
+ outputSpec,
+ R"(
+(
+ (let vt (ParseType '"Variant<A2:Struct<bool:Bool, double:Double>, A1:Struct<bool:Bool, int64:Int64, string:String>>"))
+ (return (Map (Self '0) (lambda '(x) (block '(
+ (let r1 (Variant (AsStruct '('bool (Member x 'bool)) '('int64 (Member x 'int64)) '('string (Member x 'string))) 'A1 vt))
+ (let r2 (Variant (AsStruct '('bool (Member x 'bool)) '('double (Member x 'double))) 'A2 vt))
+ (return (If (Coalesce (== (% (Member x 'int64) (Int64 '2)) (Int64 '0)) (Bool 'false)) r1 r2))
+ )))))
+)
+ )",
+ ETranslationMode::SExpr
+ );
+
+ auto input = TStringStream(stream);
+ auto handle = program->Apply(&input);
+ TStringStream output1, output2;
+ auto outputs = TMap<TString, IOutputStream*>();
+ outputs["A1"] = &output1;
+ outputs["A2"] = &output2;
+ handle->Run(outputs);
+ ASSERT_EQUAL_STREAMS(someStream1, output1);
+ ASSERT_EQUAL_STREAMS(someStream2, output2);
+
+ UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){
+ program->Apply(&input)->Run(&output1);
+ }()), yexception, "cannot be used with multi-output programs");
+
+ UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){
+ auto outputs = TVector<IOutputStream*>({});
+ program->Apply(&input)->Run(outputs);
+ }()), yexception, "cannot be used to process variants over struct");
+
+ UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){
+ auto outputs = TMap<TString, IOutputStream*>();
+ outputs["A1"] = &output1;
+ program->Apply(&input)->Run(outputs);
+ }()), yexception, "Number of variant alternatives should match number of streams");
+
+ UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){
+ auto outputs = TMap<TString, IOutputStream*>();
+ outputs["A1"] = &output1;
+ outputs["A2"] = &output1;
+ outputs["A3"] = &output1;
+ program->Apply(&input)->Run(outputs);
+ }()), yexception, "Number of variant alternatives should match number of streams");
+
+ UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){
+ auto outputs = TMap<TString, IOutputStream*>();
+ outputs["A1"] = &output1;
+ outputs["B1"] = &output1;
+ program->Apply(&input)->Run(outputs);
+ }()), yexception, "Cannot find stream for alternative \"A2\"");
+ }
+ }
+
+#ifdef GET_STREAM_WITH_STRUCT
+ Y_UNIT_TEST(TestReadNativeStructs) {
+ using namespace NYql::NPureCalc;
+
+ auto factory = MakeProgramFactory(
+ TProgramFactoryOptions().SetNativeYtTypeFlags(NYql::NTCF_PRODUCTION)
+ );
+
+ auto runProgram = [&factory](bool sorted) -> TStringStream {
+ auto inputSchema = GET_SCHEMA_WITH_STRUCT(sorted);
+
+ auto input0 = GET_STREAM_WITH_STRUCT(sorted, 0, 2);
+ auto input1 = GET_STREAM_WITH_STRUCT(sorted, 2, 4);
+
+ auto inputSpec = INPUT_SPEC{{inputSchema, inputSchema}}.SetUseOriginalRowSpec(!sorted);
+ auto outputSpec = OUTPUT_SPEC{NYT::TNode::CreateEntity()};
+
+ auto program = CREATE_PROGRAM(
+ inputSpec,
+ outputSpec,
+ R"(
+(
+ (return (Extend (Self '0) (Self '1)))
+)
+ )",
+ ETranslationMode::SExpr
+ );
+
+ TStringStream result;
+
+ auto handle = program->Apply(TVector<IInputStream*>({&input0, &input1}));
+ handle->Run(&result);
+
+ return result;
+ };
+
+ auto etalon = GET_STREAM_WITH_STRUCT(true, 0, 4);
+
+ auto output0 = runProgram(true);
+ auto output1 = runProgram(false);
+
+ ASSERT_EQUAL_STREAMS(output0, etalon);
+ ASSERT_EQUAL_STREAMS(output1, etalon);
+ }
+#endif
+
+ Y_UNIT_TEST(TestIndependentProcessings) {
+ using namespace NYql::NPureCalc;
+
+ TVector<TString> fields0 {"double", "int64", "string"}; // keep this sorted
+ auto schema0 = GetSchema(fields0);
+ auto stream0 = GET_STREAM(fields0, {}, 0, 10, 1);
+
+ TVector<TString> someFields0 {"int64", "string"};
+ auto someStream0 = GET_STREAM(someFields0, {}, 0, 10, 2); // sample with even int64 numbers
+
+ TVector<TString> fields1 {"bool", "int64", "uint64"}; // keep this sorted
+ auto schema1 = GetSchema(fields1);
+ auto stream1 = GET_STREAM(fields1, {}, 0, 10, 1);
+
+ TVector<TString> someFields1 {"int64", "uint64"};
+ auto someStream1 = GET_STREAM(someFields1, {}, 1, 10, 2); // sample with odd int64 numbers
+
+ auto factory = MakeProgramFactory();
+
+ {
+ auto inputSpec = INPUT_SPEC {{schema0, schema1}};
+ auto outputSpec = OUTPUT_SPEC {NYT::TNode::CreateEntity()};
+
+ auto program = CREATE_PROGRAM(
+ inputSpec,
+ outputSpec,
+ R"(
+(
+ (let $type (ParseType '"Variant<Struct<int64: Int64, string:String>, Struct<int64:Int64, uint64: Uint64>>"))
+ (let $stream0 (FlatMap (Self '0) (lambda '(x) (block '(
+ (let $item (Variant (AsStruct '('int64 (Member x 'int64)) '('string (Member x 'string))) '0 $type))
+ (return (ListIf (Coalesce (== (% (Member x 'int64) (Int64 '2)) (Int64 '0)) (Bool 'false)) $item))
+ )))))
+ (let $stream1 (FlatMap (Self '1) (lambda '(x) (block '(
+ (let $item (Variant (AsStruct '('int64 (Member x 'int64)) '('uint64 (Member x 'uint64))) '1 $type))
+ (return (ListIf (Coalesce (== (% (Member x 'int64) (Int64 '2)) (Int64 '1)) (Bool 'false)) $item))
+ )))))
+ (return (Extend $stream0 $stream1))
+)
+ )",
+ ETranslationMode::SExpr
+ );
+
+ UNIT_ASSERT_EQUAL(program->MakeInputSchema(0), schema0);
+ UNIT_ASSERT_EQUAL(program->MakeInputSchema(1), schema1);
+
+ UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){
+ auto unused = program->MakeInputSchema(2);
+ }()), yexception, "invalid input index (2) in MakeInputSchema call");
+
+ UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){
+ auto unused = program->MakeInputSchema();
+ }()), yexception, "MakeInputSchema() can be used only with single-input programs");
+
+ TStringStream input0(stream0);
+ TStringStream input1(stream1);
+ auto handle = program->Apply(TVector<IInputStream*>({&input0, &input1}));
+ TStringStream output0, output1;
+ handle->Run(TVector<IOutputStream*>({&output0, &output1}));
+
+ ASSERT_EQUAL_STREAMS(someStream0, output0);
+ ASSERT_EQUAL_STREAMS(someStream1, output1);
+
+ UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){
+ auto unused = program->Apply(TVector<IInputStream*>());
+ }()), yexception, "number of input streams should match number of inputs");
+
+ UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){
+ auto unused = program->Apply(TVector<IInputStream*>({&input0}));
+ }()), yexception, "number of input streams should match number of inputs");
+
+ UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){
+ TStringStream input2;
+ auto unused = program->Apply(TVector<IInputStream*>({&input0, &input1, &input2}));
+ }()), yexception, "number of input streams should match number of inputs");
+
+ UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){
+ auto unused = program->Apply(&input0);
+ }()), yexception, "number of input streams should match number of inputs");
+ }
+ }
+
+ Y_UNIT_TEST(TestMergeInputs) {
+ using namespace NYql::NPureCalc;
+
+ TVector<TString> fields0 {"double", "int64", "string", "uint64"}; // keep this sorted
+ auto schema0 = GetSchema(fields0);
+ auto stream0 = GET_STREAM(fields0, {}, 0, 5, 1);
+
+ TVector<TString> fields1 {"double", "int64", "uint64", "yson"}; // keep this sorted
+ auto schema1 = GetSchema(fields1);
+ auto stream1 = GET_STREAM(fields1, {}, 5, 10, 1);
+
+ TVector<TString> someFields {"double", "int64", "uint64"}; // keep this sorted
+ auto mergedStream = GET_STREAM(someFields, {}, 0, 10, 1);
+ auto mergedSchema = GetSchema(someFields);
+
+ auto factory = MakeProgramFactory();
+
+ {
+ auto inputSpec = INPUT_SPEC {{schema0, schema1}};
+ auto outputSpec = OUTPUT_SPEC {NYT::TNode::CreateEntity()};
+
+ auto program = CREATE_PROGRAM(
+ inputSpec,
+ outputSpec,
+ R"(
+(
+ (let $stream0 (Map (Self '0) (lambda '(x) (RemoveMember x 'string))))
+ (let $stream1 (Map (Self '1) (lambda '(x) (RemoveMember x 'yson))))
+ (return (Extend $stream0 $stream1))
+)
+ )",
+ ETranslationMode::SExpr
+ );
+
+ UNIT_ASSERT_EQUAL(program->MakeInputSchema(0), schema0);
+ UNIT_ASSERT_EQUAL(program->MakeInputSchema(1), schema1);
+ UNIT_ASSERT_EQUAL(program->MakeFullOutputSchema(), mergedSchema);
+
+ TStringStream input0(stream0);
+ TStringStream input1(stream1);
+ auto handle = program->Apply(TVector<IInputStream*>({&input0, &input1}));
+ TStringStream output;
+ handle->Run(&output);
+
+ ASSERT_EQUAL_STREAMS(mergedStream, output);
+ }
+ }
+
+ Y_UNIT_TEST(TestTableName) {
+ using namespace NYql::NPureCalc;
+
+ TVector<TVector<int>> values = {{3, 5}};
+
+ auto inputSchema = GetSchema({"int64"});
+ auto stream = GET_MULTITABLE_STREAM(values);
+ auto etalon = GET_MULTITABLE_STREAM(values, {"Input"});
+
+ auto factory = MakeProgramFactory(TProgramFactoryOptions().SetUseSystemColumns(true));
+
+ {
+ auto program = CREATE_PROGRAM(
+ INPUT_SPEC(inputSchema),
+ OUTPUT_SPEC(NYT::TNode::CreateEntity()),
+ "SELECT `int64`, TableName() AS `tname` FROM Input",
+ ETranslationMode::SQL
+ );
+
+ auto handle = program->Apply(&stream);
+ TStringStream output;
+ handle->Run(&output);
+
+ ASSERT_EQUAL_STREAMS(output, etalon);
+ }
+ }
+
+ Y_UNIT_TEST(TestCustomTableName) {
+ using namespace NYql::NPureCalc;
+
+ TVector<TVector<int>> values = {{3, 5}, {2, 8}};
+ TVector<TString> tableNames = {"One", "Two"};
+
+ auto inputSchema = GetSchema({"int64"});
+ auto stream = GET_MULTITABLE_STREAM(values);
+ auto etalon = GET_MULTITABLE_STREAM(values, tableNames);
+
+ auto factory = MakeProgramFactory(TProgramFactoryOptions().SetUseSystemColumns(true));
+
+ {
+ auto program = CREATE_PROGRAM(
+ INPUT_SPEC(inputSchema).SetTableNames(tableNames),
+ OUTPUT_SPEC(NYT::TNode::CreateEntity()),
+ "SELECT `int64`, TableName() AS `tname` FROM TABLES()",
+ ETranslationMode::SQL
+ );
+
+ auto handle = program->Apply(&stream);
+ TStringStream output;
+ handle->Run(&output);
+
+ ASSERT_EQUAL_STREAMS(output, etalon);
+ }
+ }
+
+#ifdef PULL_LIST_MODE
+ Y_UNIT_TEST(TestMultiinputTableName) {
+ using namespace NYql::NPureCalc;
+
+ TVector<TVector<int>> values0 = {{3, 5}};
+ TVector<TVector<int>> values1 = {{7, 9}};
+
+ auto inputSchema = GetSchema({"int64"});
+ auto stream0 = GET_MULTITABLE_STREAM(values0);
+ auto stream1 = GET_MULTITABLE_STREAM(values1);
+ auto etalon = GET_MULTITABLE_STREAM(JoinVectors(values0, values1), {"Input0", "Input1"});
+
+ auto factory = MakeProgramFactory(TProgramFactoryOptions().SetUseSystemColumns(true));
+
+ {
+ auto program = CREATE_PROGRAM(
+ INPUT_SPEC({inputSchema, inputSchema}),
+ OUTPUT_SPEC(NYT::TNode::CreateEntity()),
+ R"(
+$union = (
+ SELECT * FROM Input0
+ UNION ALL
+ SELECT * FROM Input1
+);
+SELECT TableName() AS `tname`, `int64` FROM $union
+ )"
+ );
+
+ auto handle = program->Apply(TVector<IInputStream*>{&stream0, &stream1});
+ TStringStream output;
+ handle->Run(&output);
+
+ ASSERT_EQUAL_STREAMS(output, etalon);
+ }
+ }
+
+ Y_UNIT_TEST(TestMultiinputCustomTableName) {
+ using namespace NYql::NPureCalc;
+
+ TVector<TVector<int>> values0 = {{1, 4}, {2, 8}};
+ TVector<TVector<int>> values1 = {{3, 5}, {7, 9}};
+ TVector<TString> tableNames0 = {"OneA", "TwoA"};
+ TVector<TString> tableNames1 = {"OneB", "TwoB"};
+
+ auto inputSchema = GetSchema({"int64"});
+ auto stream0 = GET_MULTITABLE_STREAM(values0);
+ auto stream1 = GET_MULTITABLE_STREAM(values1);
+ auto etalon = GET_MULTITABLE_STREAM(JoinVectors(values0, values1), JoinVectors(tableNames0, tableNames1));
+
+ auto factory = MakeProgramFactory(TProgramFactoryOptions().SetUseSystemColumns(true));
+
+ {
+ auto program = CREATE_PROGRAM(
+ INPUT_SPEC({inputSchema, inputSchema}).SetTableNames(tableNames0, 0).SetTableNames(tableNames1, 1),
+ OUTPUT_SPEC(NYT::TNode::CreateEntity()),
+ R"(
+$input0, $input1 = PROCESS TABLES();
+$union = (
+ SELECT * FROM $input0
+ UNION ALL
+ SELECT * FROM $input1
+);
+SELECT TableName() AS `tname`, `int64` FROM $union
+ )"
+ );
+
+ auto handle = program->Apply(TVector<IInputStream*>{&stream0, &stream1});
+ TStringStream output;
+ handle->Run(&output);
+
+ ASSERT_EQUAL_STREAMS(output, etalon);
+ }
+ }
+#endif
+}
diff --git a/ydb/library/yql/public/purecalc/io_specs/mkql/ut/test_spec.cpp b/ydb/library/yql/public/purecalc/io_specs/mkql/ut/test_spec.cpp
new file mode 100644
index 0000000000..255e815e8f
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/io_specs/mkql/ut/test_spec.cpp
@@ -0,0 +1,325 @@
+#include <ydb/library/yql/providers/common/codec/yql_codec_type_flags.h>
+
+#include <library/cpp/testing/unittest/registar.h>
+#include <library/cpp/yson/writer.h>
+
+#include <library/cpp/yson/node/node.h>
+#include <library/cpp/yson/node/node_io.h>
+#include <library/cpp/yson/node/node_visitor.h>
+
+#include <ydb/library/yql/public/purecalc/common/interface.h>
+#include <ydb/library/yql/public/purecalc/io_specs/mkql/spec.h>
+#include <ydb/library/yql/public/purecalc/ut/lib/helpers.h>
+
+#include <util/generic/hash_set.h>
+#include <util/generic/ptr.h>
+#include <util/stream/str.h>
+
+#include <library/cpp/skiff/skiff.h>
+
+#include <util/generic/yexception.h>
+
+
+namespace {
+ TStringStream GetYsonStream(
+ const TVector<TString>& fields,
+ const TVector<TString>& optionalFields={},
+ ui32 start = 0, ui32 stop = 5, ui32 step = 1
+ ) {
+ THashSet<TString> filter {fields.begin(), fields.end()};
+ THashSet<TString> optionalFilter {optionalFields.begin(), optionalFields.end()};
+
+ auto addField = [&] (
+ NYT::TNode& node, const TString& field, NYT::TNode&& value
+ ) {
+ if (filter.contains(field) && !optionalFilter.contains(field)) {
+ node(field, value);
+ }
+ };
+
+ TStringStream stream;
+ NYson::TYsonWriter writer(&stream, NYson::EYsonFormat::Binary, NYson::EYsonType::ListFragment);
+ NYT::TNodeVisitor visitor(&writer);
+
+ for (ui32 i = start; i < stop; i += step) {
+ auto item = NYT::TNode::CreateMap();
+
+ addField(item, "int64", (i64)(i));
+ addField(item, "uint64", (ui64)(i * 2));
+ addField(item, "double", (double)(i * 3.5));
+ addField(item, "bool", true);
+ addField(item, "string", "foo");
+ addField(item, "yson", (i % 2 == 0 ? NYT::TNode(true) : NYT::TNode(false)));
+
+ visitor.Visit(item);
+ }
+
+ return stream;
+ }
+
+ TStringStream GetMultitableYsonStream(
+ const TVector<TVector<int>>& groupedValues,
+ const TVector<TString>& etalonTableNames = {}
+ ) {
+ bool isEtalon = !etalonTableNames.empty();
+
+ Y_ENSURE(!isEtalon || groupedValues.size() == etalonTableNames.size());
+
+ TStringStream stream;
+ NYson::TYsonWriter writer(&stream, NYson::EYsonFormat::Binary, NYson::EYsonType::ListFragment);
+ NYT::TNodeVisitor visitor(&writer);
+
+ for (ui64 tableIndex = 0; tableIndex < groupedValues.size(); ++tableIndex) {
+ if (!isEtalon) {
+ auto indexNode = NYT::TNode::CreateEntity();
+ indexNode.Attributes() = NYT::TNode::CreateMap()("table_index", static_cast<i64>(tableIndex));
+ visitor.Visit(indexNode);
+ }
+
+ const auto& values = groupedValues[tableIndex];
+
+ for (ui64 i = 0; i < values.size(); ++i) {
+ auto item = NYT::TNode::CreateMap()("int64", values[i]);
+ if (isEtalon) {
+ item("tname", etalonTableNames[tableIndex]);
+ }
+ visitor.Visit(item);
+ }
+ }
+
+ return stream;
+ }
+
+ void AssertEqualYsonStreams(TStringStream etalonStream, TStringStream stream) {
+ NYT::TNode etalonList {
+ NYT::NodeFromYsonStream(&etalonStream, NYson::EYsonType::ListFragment)
+ };
+
+ NYT::TNode list {
+ NYT::NodeFromYsonStream(&stream, NYson::EYsonType::ListFragment)
+ };
+
+ UNIT_ASSERT_EQUAL(etalonList, list);
+ }
+
+ TStringStream GetSkiffStream(
+ const TVector<TString>& fields,
+ const TVector<TString>& optionalFields={},
+ ui32 start = 0, ui32 stop = 5, ui32 step = 1
+ ) {
+ THashSet<TString> filter {fields.begin(), fields.end()};
+ THashSet<TString> optionalFilter {optionalFields.begin(), optionalFields.end()};
+
+ TStringStream stream;
+ NSkiff::TUncheckedSkiffWriter writer {&stream};
+
+#define WRITE_FIELD(field, type, value) \
+ do { \
+ if (filter.contains(field)) { \
+ if (optionalFilter.contains(field)) { \
+ writer.WriteVariant8Tag(0); \
+ } else { \
+ writer.Write ## type(value); \
+ } \
+ } \
+ } while (0)
+
+ for (ui32 i = start; i < stop; i += step) {
+ auto item = NYT::TNode::CreateMap();
+
+ writer.WriteVariant16Tag(0);
+ WRITE_FIELD("bool", Boolean, true);
+ WRITE_FIELD("double", Double, (double)(i * 3.5));
+ WRITE_FIELD("int64", Int64, (i64)(i));
+ WRITE_FIELD("string", String32, "foo");
+ WRITE_FIELD("uint64", Uint64, (ui64)(i * 2));
+ WRITE_FIELD("yson", Yson32, (i % 2 == 0 ? "\x05" : "\x04")); // boolean values
+ }
+
+#undef WRITE_FIELD
+
+ return stream;
+ }
+
+ TStringStream GetMultitableSkiffStream(
+ const TVector<TVector<int>>& groupedValues,
+ const TVector<TString>& etalonTableNames = {}
+ ) {
+ bool isEtalon = !etalonTableNames.empty();
+
+ Y_ENSURE(!isEtalon || groupedValues.size() == etalonTableNames.size());
+
+ TStringStream stream;
+ NSkiff::TUncheckedSkiffWriter writer {&stream};
+
+ for (ui64 tableIndex = 0; tableIndex < groupedValues.size(); ++tableIndex) {
+ const auto& values = groupedValues[tableIndex];
+
+ for (ui64 i = 0; i < values.size(); ++i) {
+ if (isEtalon) {
+ writer.WriteVariant16Tag(0);
+ } else {
+ writer.WriteVariant16Tag(tableIndex);
+ }
+
+ writer.WriteInt64(values[i]);
+ if (isEtalon) {
+ writer.WriteString32(etalonTableNames[tableIndex]);
+ }
+ }
+ }
+
+ return stream;
+ }
+
+ NYT::TNode GetSkiffSchemaWithStruct(bool sorted) {
+ auto aMember = NYT::TNode::CreateList()
+ .Add("a")
+ .Add(NYT::TNode::CreateList().Add("DataType").Add("String"));
+
+ auto bMember = NYT::TNode::CreateList()
+ .Add("b")
+ .Add(NYT::TNode::CreateList().Add("DataType").Add("Uint64"));
+
+ auto members = NYT::TNode::CreateList();
+
+ if (sorted) {
+ members.Add(std::move(aMember)).Add(std::move(bMember));
+ } else {
+ members.Add(std::move(bMember)).Add(std::move(aMember));
+ }
+
+ auto structColumn = NYT::TNode::CreateList()
+ .Add("Struct")
+ .Add(NYT::TNode::CreateList().Add("StructType").Add(std::move(members)));
+
+ auto indexColumn = NYT::TNode::CreateList()
+ .Add("Index")
+ .Add(NYT::TNode::CreateList().Add("DataType").Add("Uint64"));
+
+ auto schema = NYT::TNode::CreateList()
+ .Add("StructType")
+ .Add(NYT::TNode::CreateList().Add(std::move(indexColumn)).Add(std::move(structColumn)));
+
+ return schema;
+ }
+
+ TStringStream GetSkiffStreamWithStruct(bool sorted, ui32 start = 0, ui32 stop = 5) {
+ TStringStream stream;
+ NSkiff::TUncheckedSkiffWriter writer {&stream};
+
+ auto writeStructMembers = [sorted, &writer](TStringBuf stringMember, ui64 numberMember) {
+ if (sorted) {
+ writer.WriteString32(stringMember);
+ writer.WriteUint64(numberMember);
+ } else {
+ writer.WriteUint64(numberMember);
+ writer.WriteString32(stringMember);
+ }
+ };
+
+ for (ui32 idx = start; idx < stop; ++idx) {
+ auto stringData = TStringBuilder{} << "text" << idx;
+ writer.WriteVariant16Tag(0);
+ writer.WriteUint64(idx);
+ writeStructMembers(stringData, idx + 3);
+ }
+
+ return stream;
+ }
+
+ void AssertEqualSkiffStreams(TStringStream etalonStream, TStringStream stream) {
+ UNIT_ASSERT_VALUES_EQUAL(etalonStream.Str(), stream.Str());
+ }
+}
+
+template <typename T>
+TVector<T> JoinVectors(const TVector<T>& first, const TVector<T>& second) {
+ TVector<T> result;
+ result.reserve(first.size() + second.size());
+
+ result.insert(result.end(), first.begin(), first.end());
+ result.insert(result.end(), second.begin(), second.end());
+
+ return result;
+}
+
+#define PULL_STREAM_MODE
+#define TEST_SUITE_NAME TestPullStreamYsonIO
+#define CREATE_PROGRAM(...) factory->MakePullStreamProgram(__VA_ARGS__)
+#define INPUT_SPEC TYsonInputSpec
+#define OUTPUT_SPEC TYsonOutputSpec
+#define GET_STREAM GetYsonStream
+#define GET_MULTITABLE_STREAM GetMultitableYsonStream
+#define ASSERT_EQUAL_STREAMS AssertEqualYsonStreams
+#include "test.inl"
+#undef ASSERT_EQUAL_STREAMS
+#undef GET_MULTITABLE_STREAM
+#undef GET_STREAM
+#undef OUTPUT_SPEC
+#undef INPUT_SPEC
+#undef CREATE_PROGRAM
+#undef TEST_SUITE_NAME
+#undef PULL_STREAM_MODE
+
+#define PULL_STREAM_MODE
+#define TEST_SUITE_NAME TestPullStreamSkiffIO
+#define CREATE_PROGRAM(...) factory->MakePullStreamProgram(__VA_ARGS__)
+#define INPUT_SPEC TSkiffInputSpec
+#define OUTPUT_SPEC TSkiffOutputSpec
+#define GET_STREAM GetSkiffStream
+#define GET_STREAM_WITH_STRUCT GetSkiffStreamWithStruct
+#define GET_SCHEMA_WITH_STRUCT GetSkiffSchemaWithStruct
+#define GET_MULTITABLE_STREAM GetMultitableSkiffStream
+#define ASSERT_EQUAL_STREAMS AssertEqualSkiffStreams
+#include "test.inl"
+#undef ASSERT_EQUAL_STREAMS
+#undef GET_MULTITABLE_STREAM
+#undef GET_SCHEMA_WITH_STRUCT
+#undef GET_STREAM_WITH_STRUCT
+#undef GET_STREAM
+#undef OUTPUT_SPEC
+#undef INPUT_SPEC
+#undef CREATE_PROGRAM
+#undef TEST_SUITE_NAME
+#undef PULL_STREAM_MODE
+
+#define PULL_LIST_MODE
+#define TEST_SUITE_NAME TestPullListYsonIO
+#define CREATE_PROGRAM(...) factory->MakePullListProgram(__VA_ARGS__)
+#define INPUT_SPEC TYsonInputSpec
+#define OUTPUT_SPEC TYsonOutputSpec
+#define GET_STREAM GetYsonStream
+#define GET_MULTITABLE_STREAM GetMultitableYsonStream
+#define ASSERT_EQUAL_STREAMS AssertEqualYsonStreams
+#include "test.inl"
+#undef ASSERT_EQUAL_STREAMS
+#undef GET_MULTITABLE_STREAM
+#undef GET_STREAM
+#undef OUTPUT_SPEC
+#undef INPUT_SPEC
+#undef CREATE_PROGRAM
+#undef TEST_SUITE_NAME
+#undef PULL_LIST_MODE
+
+#define PULL_LIST_MODE
+#define TEST_SUITE_NAME TestPullListSkiffIO
+#define CREATE_PROGRAM(...) factory->MakePullListProgram(__VA_ARGS__)
+#define INPUT_SPEC TSkiffInputSpec
+#define OUTPUT_SPEC TSkiffOutputSpec
+#define GET_STREAM GetSkiffStream
+#define GET_STREAM_WITH_STRUCT GetSkiffStreamWithStruct
+#define GET_SCHEMA_WITH_STRUCT GetSkiffSchemaWithStruct
+#define GET_MULTITABLE_STREAM GetMultitableSkiffStream
+#define ASSERT_EQUAL_STREAMS AssertEqualSkiffStreams
+#include "test.inl"
+#undef ASSERT_EQUAL_STREAMS
+#undef GET_MULTITABLE_STREAM
+#undef GET_SCHEMA_WITH_STRUCT
+#undef GET_STREAM_WITH_STRUCT
+#undef GET_STREAM
+#undef OUTPUT_SPEC
+#undef INPUT_SPEC
+#undef CREATE_PROGRAM
+#undef TEST_SUITE_NAME
+#undef PULL_LIST_MODE
diff --git a/ydb/library/yql/public/purecalc/io_specs/mkql/ut/ya.make b/ydb/library/yql/public/purecalc/io_specs/mkql/ut/ya.make
new file mode 100644
index 0000000000..afc48d4356
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/io_specs/mkql/ut/ya.make
@@ -0,0 +1,20 @@
+UNITTEST()
+
+SIZE(MEDIUM)
+
+TIMEOUT(300)
+
+PEERDIR(
+ ydb/library/yql/public/udf/service/exception_policy
+ ydb/library/yql/public/purecalc/common
+ ydb/library/yql/public/purecalc/io_specs/mkql
+ ydb/library/yql/public/purecalc/ut/lib
+)
+
+YQL_LAST_ABI_VERSION()
+
+SRCS(
+ test_spec.cpp
+)
+
+END()
diff --git a/ydb/library/yql/public/purecalc/io_specs/mkql/ya.make b/ydb/library/yql/public/purecalc/io_specs/mkql/ya.make
new file mode 100644
index 0000000000..b6066163f7
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/io_specs/mkql/ya.make
@@ -0,0 +1,25 @@
+LIBRARY()
+
+PEERDIR(
+ ydb/library/yql/public/purecalc/common
+ ydb/library/yql/providers/yt/codec
+ ydb/library/yql/providers/yt/common
+ ydb/library/yql/providers/yt/lib/mkql_helpers
+ ydb/library/yql/providers/common/codec
+ ydb/library/yql/providers/common/schema/mkql
+)
+
+
+ YQL_LAST_ABI_VERSION()
+
+
+SRCS(
+ spec.cpp
+ spec.h
+)
+
+END()
+
+RECURSE_FOR_TESTS(
+ ut
+)
diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/public/purecalc/io_specs/protobuf/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..b21a40ca76
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/io_specs/protobuf/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,24 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(ut)
+
+add_library(purecalc-io_specs-protobuf)
+target_compile_options(purecalc-io_specs-protobuf PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_link_libraries(purecalc-io_specs-protobuf PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ public-purecalc-common
+ purecalc-io_specs-protobuf_raw
+)
+target_sources(purecalc-io_specs-protobuf PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/protobuf/spec.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/protobuf/proto_variant.cpp
+)
diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf/CMakeLists.linux-aarch64.txt b/ydb/library/yql/public/purecalc/io_specs/protobuf/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..8dc53c6230
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/io_specs/protobuf/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,25 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(ut)
+
+add_library(purecalc-io_specs-protobuf)
+target_compile_options(purecalc-io_specs-protobuf PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_link_libraries(purecalc-io_specs-protobuf PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ public-purecalc-common
+ purecalc-io_specs-protobuf_raw
+)
+target_sources(purecalc-io_specs-protobuf PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/protobuf/spec.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/protobuf/proto_variant.cpp
+)
diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf/CMakeLists.linux-x86_64.txt b/ydb/library/yql/public/purecalc/io_specs/protobuf/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..8dc53c6230
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/io_specs/protobuf/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,25 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(ut)
+
+add_library(purecalc-io_specs-protobuf)
+target_compile_options(purecalc-io_specs-protobuf PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_link_libraries(purecalc-io_specs-protobuf PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ public-purecalc-common
+ purecalc-io_specs-protobuf_raw
+)
+target_sources(purecalc-io_specs-protobuf PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/protobuf/spec.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/protobuf/proto_variant.cpp
+)
diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf/CMakeLists.txt b/ydb/library/yql/public/purecalc/io_specs/protobuf/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/io_specs/protobuf/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf/CMakeLists.windows-x86_64.txt b/ydb/library/yql/public/purecalc/io_specs/protobuf/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..b21a40ca76
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/io_specs/protobuf/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,24 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(ut)
+
+add_library(purecalc-io_specs-protobuf)
+target_compile_options(purecalc-io_specs-protobuf PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_link_libraries(purecalc-io_specs-protobuf PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ public-purecalc-common
+ purecalc-io_specs-protobuf_raw
+)
+target_sources(purecalc-io_specs-protobuf PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/protobuf/spec.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/protobuf/proto_variant.cpp
+)
diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf/proto_variant.cpp b/ydb/library/yql/public/purecalc/io_specs/protobuf/proto_variant.cpp
new file mode 100644
index 0000000000..90f0b339ca
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/io_specs/protobuf/proto_variant.cpp
@@ -0,0 +1 @@
+#include "proto_variant.h"
diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf/proto_variant.h b/ydb/library/yql/public/purecalc/io_specs/protobuf/proto_variant.h
new file mode 100644
index 0000000000..c7d137d0e6
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/io_specs/protobuf/proto_variant.h
@@ -0,0 +1,80 @@
+#pragma once
+
+#include <ydb/library/yql/public/purecalc/common/interface.h>
+
+#include <array>
+
+namespace NYql::NPureCalc::NPrivate {
+ using TProtoRawMultiOutput = std::pair<ui32, google::protobuf::Message*>;
+
+ template <typename... T>
+ using TProtoMultiOutput = std::variant<T*...>;
+
+ template <size_t I, typename... T>
+ using TProtoOutput = std::add_pointer_t<typename TTypeList<T...>::template TGet<I>>;
+
+ template <size_t I, typename... T>
+ TProtoMultiOutput<T...> InitProtobufsVariant(google::protobuf::Message* ptr) {
+ static_assert(std::conjunction_v<std::is_base_of<google::protobuf::Message, T>...>);
+ return TProtoMultiOutput<T...>(std::in_place_index<I>, static_cast<TProtoOutput<I, T...>>(ptr));
+ }
+
+ template <typename... T>
+ class TProtobufsMappingBase {
+ public:
+ TProtobufsMappingBase()
+ : InitFuncs_(BuildInitFuncs(std::make_index_sequence<sizeof...(T)>()))
+ {
+ }
+
+ private:
+ typedef TProtoMultiOutput<T...> (*initfunc)(google::protobuf::Message*);
+
+ template <size_t... I>
+ inline std::array<initfunc, sizeof...(T)> BuildInitFuncs(std::index_sequence<I...>) {
+ return {&InitProtobufsVariant<I, T...>...};
+ }
+
+ protected:
+ const std::array<initfunc, sizeof...(T)> InitFuncs_;
+ };
+
+ template <typename... T>
+ class TProtobufsMappingStream: public IStream<TProtoMultiOutput<T...>>, public TProtobufsMappingBase<T...> {
+ public:
+ TProtobufsMappingStream(THolder<IStream<TProtoRawMultiOutput>> oldStream)
+ : OldStream_(std::move(oldStream))
+ {
+ }
+
+ public:
+ TProtoMultiOutput<T...> Fetch() override {
+ auto&& oldItem = OldStream_->Fetch();
+ return this->InitFuncs_[oldItem.first](oldItem.second);
+ }
+
+ private:
+ THolder<IStream<TProtoRawMultiOutput>> OldStream_;
+ };
+
+ template <typename... T>
+ class TProtobufsMappingConsumer: public IConsumer<TProtoRawMultiOutput>, public TProtobufsMappingBase<T...> {
+ public:
+ TProtobufsMappingConsumer(THolder<IConsumer<TProtoMultiOutput<T...>>> oldConsumer)
+ : OldConsumer_(std::move(oldConsumer))
+ {
+ }
+
+ public:
+ void OnObject(TProtoRawMultiOutput oldItem) override {
+ OldConsumer_->OnObject(this->InitFuncs_[oldItem.first](oldItem.second));
+ }
+
+ void OnFinish() override {
+ OldConsumer_->OnFinish();
+ }
+
+ private:
+ THolder<IConsumer<TProtoMultiOutput<T...>>> OldConsumer_;
+ };
+}
diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf/spec.cpp b/ydb/library/yql/public/purecalc/io_specs/protobuf/spec.cpp
new file mode 100644
index 0000000000..91de6c290a
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/io_specs/protobuf/spec.cpp
@@ -0,0 +1 @@
+#include "spec.h"
diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf/spec.h b/ydb/library/yql/public/purecalc/io_specs/protobuf/spec.h
new file mode 100644
index 0000000000..53a4a2f96e
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/io_specs/protobuf/spec.h
@@ -0,0 +1,147 @@
+#pragma once
+
+#include "proto_variant.h"
+
+#include <ydb/library/yql/public/purecalc/io_specs/protobuf_raw/spec.h>
+
+namespace NYql {
+ namespace NPureCalc {
+ /**
+ * Processing mode for working with non-raw protobuf messages.
+ *
+ * @tparam T message type.
+ */
+ template <typename T>
+ class TProtobufInputSpec: public TProtobufRawInputSpec {
+ static_assert(std::is_base_of<google::protobuf::Message, T>::value,
+ "should be derived from google::protobuf::Message");
+ public:
+ TProtobufInputSpec(
+ const TMaybe<TString>& timestampColumn = Nothing(),
+ const TProtoSchemaOptions& options = {}
+ )
+ : TProtobufRawInputSpec(*T::descriptor(), timestampColumn, options)
+ {
+ }
+ };
+
+ /**
+ * Processing mode for working with non-raw protobuf messages.
+ *
+ * @tparam T message type.
+ */
+ template <typename T>
+ class TProtobufOutputSpec: public TProtobufRawOutputSpec {
+ static_assert(std::is_base_of<google::protobuf::Message, T>::value,
+ "should be derived from google::protobuf::Message");
+ public:
+ TProtobufOutputSpec(
+ const TProtoSchemaOptions& options = {},
+ google::protobuf::Arena* arena = nullptr
+ )
+ : TProtobufRawOutputSpec(*T::descriptor(), nullptr, options, arena)
+ {
+ }
+ };
+
+ /**
+ * Processing mode for working with non-raw protobuf messages and several outputs.
+ */
+ template <typename... T>
+ class TProtobufMultiOutputSpec: public TProtobufRawMultiOutputSpec {
+ static_assert(
+ std::conjunction_v<std::is_base_of<google::protobuf::Message, T>...>,
+ "all types should be derived from google::protobuf::Message");
+ public:
+ TProtobufMultiOutputSpec(
+ const TProtoSchemaOptions& options = {},
+ TMaybe<TVector<google::protobuf::Arena*>> arenas = {}
+ )
+ : TProtobufRawMultiOutputSpec({T::descriptor()...}, Nothing(), options, std::move(arenas))
+ {
+ }
+ };
+
+ template <typename T>
+ struct TInputSpecTraits<TProtobufInputSpec<T>> {
+ static const constexpr bool IsPartial = false;
+
+ static const constexpr bool SupportPullStreamMode = true;
+ static const constexpr bool SupportPullListMode = true;
+ static const constexpr bool SupportPushStreamMode = true;
+
+ using TConsumerType = THolder<IConsumer<T*>>;
+
+ static void PreparePullStreamWorker(const TProtobufInputSpec<T>& inputSpec, IPullStreamWorker* worker, THolder<IStream<T*>> stream) {
+ auto raw = ConvertStream<google::protobuf::Message*>(std::move(stream));
+ TInputSpecTraits<TProtobufRawInputSpec>::PreparePullStreamWorker(inputSpec, worker, std::move(raw));
+ }
+
+ static void PreparePullListWorker(const TProtobufInputSpec<T>& inputSpec, IPullListWorker* worker, THolder<IStream<T*>> stream) {
+ auto raw = ConvertStream<google::protobuf::Message*>(std::move(stream));
+ TInputSpecTraits<TProtobufRawInputSpec>::PreparePullListWorker(inputSpec, worker, std::move(raw));
+ }
+
+ static TConsumerType MakeConsumer(const TProtobufInputSpec<T>& inputSpec, TWorkerHolder<IPushStreamWorker> worker) {
+ auto raw = TInputSpecTraits<TProtobufRawInputSpec>::MakeConsumer(inputSpec, std::move(worker));
+ return ConvertConsumer<T*>(std::move(raw));
+ }
+ };
+
+ template <typename T>
+ struct TOutputSpecTraits<TProtobufOutputSpec<T>> {
+ static const constexpr bool IsPartial = false;
+
+ static const constexpr bool SupportPullStreamMode = true;
+ static const constexpr bool SupportPullListMode = true;
+ static const constexpr bool SupportPushStreamMode = true;
+
+ using TOutputItemType = T*;
+ using TPullStreamReturnType = THolder<IStream<TOutputItemType>>;
+ using TPullListReturnType = THolder<IStream<TOutputItemType>>;
+
+ static TPullStreamReturnType ConvertPullStreamWorkerToOutputType(const TProtobufOutputSpec<T>& outputSpec, TWorkerHolder<IPullStreamWorker> worker) {
+ auto raw = TOutputSpecTraits<TProtobufRawOutputSpec>::ConvertPullStreamWorkerToOutputType(outputSpec, std::move(worker));
+ return ConvertStreamUnsafe<TOutputItemType>(std::move(raw));
+ }
+
+ static TPullListReturnType ConvertPullListWorkerToOutputType(const TProtobufOutputSpec<T>& outputSpec, TWorkerHolder<IPullListWorker> worker) {
+ auto raw = TOutputSpecTraits<TProtobufRawOutputSpec>::ConvertPullListWorkerToOutputType(outputSpec, std::move(worker));
+ return ConvertStreamUnsafe<TOutputItemType>(std::move(raw));
+ }
+
+ static void SetConsumerToWorker(const TProtobufOutputSpec<T>& outputSpec, IPushStreamWorker* worker, THolder<IConsumer<T*>> consumer) {
+ auto raw = ConvertConsumerUnsafe<google::protobuf::Message*>(std::move(consumer));
+ TOutputSpecTraits<TProtobufRawOutputSpec>::SetConsumerToWorker(outputSpec, worker, std::move(raw));
+ }
+ };
+
+ template <typename... T>
+ struct TOutputSpecTraits<TProtobufMultiOutputSpec<T...>> {
+ static const constexpr bool IsPartial = false;
+
+ static const constexpr bool SupportPullStreamMode = true;
+ static const constexpr bool SupportPullListMode = true;
+ static const constexpr bool SupportPushStreamMode = true;
+
+ using TOutputItemType = std::variant<T*...>;
+ using TPullStreamReturnType = THolder<IStream<TOutputItemType>>;
+ using TPullListReturnType = THolder<IStream<TOutputItemType>>;
+
+ static TPullStreamReturnType ConvertPullStreamWorkerToOutputType(const TProtobufMultiOutputSpec<T...>& outputSpec, TWorkerHolder<IPullStreamWorker> worker) {
+ auto raw = TOutputSpecTraits<TProtobufRawMultiOutputSpec>::ConvertPullStreamWorkerToOutputType(outputSpec, std::move(worker));
+ return THolder(new NPrivate::TProtobufsMappingStream<T...>(std::move(raw)));
+ }
+
+ static TPullListReturnType ConvertPullListWorkerToOutputType(const TProtobufMultiOutputSpec<T...>& outputSpec, TWorkerHolder<IPullListWorker> worker) {
+ auto raw = TOutputSpecTraits<TProtobufRawMultiOutputSpec>::ConvertPullListWorkerToOutputType(outputSpec, std::move(worker));
+ return THolder(new NPrivate::TProtobufsMappingStream<T...>(std::move(raw)));
+ }
+
+ static void SetConsumerToWorker(const TProtobufMultiOutputSpec<T...>& outputSpec, IPushStreamWorker* worker, THolder<IConsumer<TOutputItemType>> consumer) {
+ auto wrapper = MakeHolder<NPrivate::TProtobufsMappingConsumer<T...>>(std::move(consumer));
+ TOutputSpecTraits<TProtobufRawMultiOutputSpec>::SetConsumerToWorker(outputSpec, worker, std::move(wrapper));
+ }
+ };
+ }
+}
diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..b5599ef496
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,71 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_executable(yql-public-purecalc-io_specs-protobuf-ut)
+target_compile_options(yql-public-purecalc-io_specs-protobuf-ut PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_link_libraries(yql-public-purecalc-io_specs-protobuf-ut PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-cpuid_check
+ cpp-testing-unittest_main
+ cpp-protobuf-util
+ udf-service-exception_policy
+ public-purecalc-common
+ purecalc-io_specs-protobuf
+ purecalc-ut-protos
+)
+target_link_options(yql-public-purecalc-io_specs-protobuf-ut PRIVATE
+ -Wl,-platform_version,macos,11.0,11.0
+ -fPIC
+ -fPIC
+ -framework
+ CoreFoundation
+)
+target_sources(yql-public-purecalc-io_specs-protobuf-ut PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/test_spec.cpp
+)
+set_property(
+ TARGET
+ yql-public-purecalc-io_specs-protobuf-ut
+ PROPERTY
+ SPLIT_FACTOR
+ 1
+)
+add_yunittest(
+ NAME
+ yql-public-purecalc-io_specs-protobuf-ut
+ TEST_TARGET
+ yql-public-purecalc-io_specs-protobuf-ut
+ TEST_ARG
+ --print-before-suite
+ --print-before-test
+ --fork-tests
+ --print-times
+ --show-fails
+)
+set_yunittest_property(
+ TEST
+ yql-public-purecalc-io_specs-protobuf-ut
+ PROPERTY
+ LABELS
+ MEDIUM
+)
+set_yunittest_property(
+ TEST
+ yql-public-purecalc-io_specs-protobuf-ut
+ PROPERTY
+ PROCESSORS
+ 1
+)
+target_allocator(yql-public-purecalc-io_specs-protobuf-ut
+ system_allocator
+)
+vcs_info(yql-public-purecalc-io_specs-protobuf-ut)
diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/CMakeLists.linux-aarch64.txt b/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..2ff8d5a7fc
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,74 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_executable(yql-public-purecalc-io_specs-protobuf-ut)
+target_compile_options(yql-public-purecalc-io_specs-protobuf-ut PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_link_libraries(yql-public-purecalc-io_specs-protobuf-ut PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ cpp-testing-unittest_main
+ cpp-protobuf-util
+ udf-service-exception_policy
+ public-purecalc-common
+ purecalc-io_specs-protobuf
+ purecalc-ut-protos
+)
+target_link_options(yql-public-purecalc-io_specs-protobuf-ut PRIVATE
+ -ldl
+ -lrt
+ -Wl,--no-as-needed
+ -fPIC
+ -fPIC
+ -lpthread
+ -lrt
+ -ldl
+)
+target_sources(yql-public-purecalc-io_specs-protobuf-ut PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/test_spec.cpp
+)
+set_property(
+ TARGET
+ yql-public-purecalc-io_specs-protobuf-ut
+ PROPERTY
+ SPLIT_FACTOR
+ 1
+)
+add_yunittest(
+ NAME
+ yql-public-purecalc-io_specs-protobuf-ut
+ TEST_TARGET
+ yql-public-purecalc-io_specs-protobuf-ut
+ TEST_ARG
+ --print-before-suite
+ --print-before-test
+ --fork-tests
+ --print-times
+ --show-fails
+)
+set_yunittest_property(
+ TEST
+ yql-public-purecalc-io_specs-protobuf-ut
+ PROPERTY
+ LABELS
+ MEDIUM
+)
+set_yunittest_property(
+ TEST
+ yql-public-purecalc-io_specs-protobuf-ut
+ PROPERTY
+ PROCESSORS
+ 1
+)
+target_allocator(yql-public-purecalc-io_specs-protobuf-ut
+ cpp-malloc-jemalloc
+)
+vcs_info(yql-public-purecalc-io_specs-protobuf-ut)
diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/CMakeLists.linux-x86_64.txt b/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..2defc0237f
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,76 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_executable(yql-public-purecalc-io_specs-protobuf-ut)
+target_compile_options(yql-public-purecalc-io_specs-protobuf-ut PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_link_libraries(yql-public-purecalc-io_specs-protobuf-ut PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-cpuid_check
+ cpp-testing-unittest_main
+ cpp-protobuf-util
+ udf-service-exception_policy
+ public-purecalc-common
+ purecalc-io_specs-protobuf
+ purecalc-ut-protos
+)
+target_link_options(yql-public-purecalc-io_specs-protobuf-ut PRIVATE
+ -ldl
+ -lrt
+ -Wl,--no-as-needed
+ -fPIC
+ -fPIC
+ -lpthread
+ -lrt
+ -ldl
+)
+target_sources(yql-public-purecalc-io_specs-protobuf-ut PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/test_spec.cpp
+)
+set_property(
+ TARGET
+ yql-public-purecalc-io_specs-protobuf-ut
+ PROPERTY
+ SPLIT_FACTOR
+ 1
+)
+add_yunittest(
+ NAME
+ yql-public-purecalc-io_specs-protobuf-ut
+ TEST_TARGET
+ yql-public-purecalc-io_specs-protobuf-ut
+ TEST_ARG
+ --print-before-suite
+ --print-before-test
+ --fork-tests
+ --print-times
+ --show-fails
+)
+set_yunittest_property(
+ TEST
+ yql-public-purecalc-io_specs-protobuf-ut
+ PROPERTY
+ LABELS
+ MEDIUM
+)
+set_yunittest_property(
+ TEST
+ yql-public-purecalc-io_specs-protobuf-ut
+ PROPERTY
+ PROCESSORS
+ 1
+)
+target_allocator(yql-public-purecalc-io_specs-protobuf-ut
+ cpp-malloc-tcmalloc
+ libs-tcmalloc-no_percpu_cache
+)
+vcs_info(yql-public-purecalc-io_specs-protobuf-ut)
diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/CMakeLists.txt b/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/CMakeLists.windows-x86_64.txt b/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..4e2687f660
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,64 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_executable(yql-public-purecalc-io_specs-protobuf-ut)
+target_compile_options(yql-public-purecalc-io_specs-protobuf-ut PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_link_libraries(yql-public-purecalc-io_specs-protobuf-ut PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-cpuid_check
+ cpp-testing-unittest_main
+ cpp-protobuf-util
+ udf-service-exception_policy
+ public-purecalc-common
+ purecalc-io_specs-protobuf
+ purecalc-ut-protos
+)
+target_sources(yql-public-purecalc-io_specs-protobuf-ut PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/test_spec.cpp
+)
+set_property(
+ TARGET
+ yql-public-purecalc-io_specs-protobuf-ut
+ PROPERTY
+ SPLIT_FACTOR
+ 1
+)
+add_yunittest(
+ NAME
+ yql-public-purecalc-io_specs-protobuf-ut
+ TEST_TARGET
+ yql-public-purecalc-io_specs-protobuf-ut
+ TEST_ARG
+ --print-before-suite
+ --print-before-test
+ --fork-tests
+ --print-times
+ --show-fails
+)
+set_yunittest_property(
+ TEST
+ yql-public-purecalc-io_specs-protobuf-ut
+ PROPERTY
+ LABELS
+ MEDIUM
+)
+set_yunittest_property(
+ TEST
+ yql-public-purecalc-io_specs-protobuf-ut
+ PROPERTY
+ PROCESSORS
+ 1
+)
+target_allocator(yql-public-purecalc-io_specs-protobuf-ut
+ system_allocator
+)
+vcs_info(yql-public-purecalc-io_specs-protobuf-ut)
diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/test_spec.cpp b/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/test_spec.cpp
new file mode 100644
index 0000000000..384e617016
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/test_spec.cpp
@@ -0,0 +1,995 @@
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <ydb/library/yql/public/purecalc/common/interface.h>
+#include <ydb/library/yql/public/purecalc/io_specs/protobuf/spec.h>
+#include <ydb/library/yql/public/purecalc/ut/protos/test_structs.pb.h>
+#include <library/cpp/protobuf/util/pb_io.h>
+#include <util/generic/xrange.h>
+
+namespace {
+ TMaybe<NPureCalcProto::TAllTypes> allTypesMessage;
+
+ NPureCalcProto::TAllTypes& GetCanonicalMessage() {
+ if (!allTypesMessage) {
+ allTypesMessage = NPureCalcProto::TAllTypes();
+
+ allTypesMessage->SetFDouble(1);
+ allTypesMessage->SetFFloat(2);
+ allTypesMessage->SetFInt64(3);
+ allTypesMessage->SetFSfixed64(4);
+ allTypesMessage->SetFSint64(5);
+ allTypesMessage->SetFUint64(6);
+ allTypesMessage->SetFFixed64(7);
+ allTypesMessage->SetFInt32(8);
+ allTypesMessage->SetFSfixed32(9);
+ allTypesMessage->SetFSint32(10);
+ allTypesMessage->SetFUint32(11);
+ allTypesMessage->SetFFixed32(12);
+ allTypesMessage->SetFBool(true);
+ allTypesMessage->SetFString("asd");
+ allTypesMessage->SetFBytes("dsa");
+ }
+
+ return allTypesMessage.GetRef();
+ }
+
+ template <typename T1, typename T2>
+ void AssertEqualToCanonical(const T1& got, const T2& expected) {
+ UNIT_ASSERT_EQUAL(expected.GetFDouble(), got.GetFDouble());
+ UNIT_ASSERT_EQUAL(expected.GetFFloat(), got.GetFFloat());
+ UNIT_ASSERT_EQUAL(expected.GetFInt64(), got.GetFInt64());
+ UNIT_ASSERT_EQUAL(expected.GetFSfixed64(), got.GetFSfixed64());
+ UNIT_ASSERT_EQUAL(expected.GetFSint64(), got.GetFSint64());
+ UNIT_ASSERT_EQUAL(expected.GetFUint64(), got.GetFUint64());
+ UNIT_ASSERT_EQUAL(expected.GetFFixed64(), got.GetFFixed64());
+ UNIT_ASSERT_EQUAL(expected.GetFInt32(), got.GetFInt32());
+ UNIT_ASSERT_EQUAL(expected.GetFSfixed32(), got.GetFSfixed32());
+ UNIT_ASSERT_EQUAL(expected.GetFSint32(), got.GetFSint32());
+ UNIT_ASSERT_EQUAL(expected.GetFUint32(), got.GetFUint32());
+ UNIT_ASSERT_EQUAL(expected.GetFFixed32(), got.GetFFixed32());
+ UNIT_ASSERT_EQUAL(expected.GetFBool(), got.GetFBool());
+ UNIT_ASSERT_EQUAL(expected.GetFString(), got.GetFString());
+ UNIT_ASSERT_EQUAL(expected.GetFBytes(), got.GetFBytes());
+ }
+
+ template <typename T>
+ void AssertEqualToCanonical(const T& got) {
+ AssertEqualToCanonical(got, GetCanonicalMessage());
+ }
+
+ TString SerializeToTextFormatAsString(const google::protobuf::Message& message) {
+ TString result;
+ {
+ TStringOutput output(result);
+ SerializeToTextFormat(message, output);
+ }
+ return result;
+ }
+
+ template <typename T>
+ void AssertProtoEqual(const T& actual, const T& expected) {
+ UNIT_ASSERT_VALUES_EQUAL(SerializeToTextFormatAsString(actual), SerializeToTextFormatAsString(expected));
+ }
+}
+
+class TAllTypesStreamImpl: public NYql::NPureCalc::IStream<NPureCalcProto::TAllTypes*> {
+private:
+ int I_ = 0;
+ NPureCalcProto::TAllTypes Message_ = GetCanonicalMessage();
+
+public:
+ NPureCalcProto::TAllTypes* Fetch() override {
+ if (I_ > 0) {
+ return nullptr;
+ } else {
+ I_ += 1;
+ return &Message_;
+ }
+ }
+};
+
+class TSimpleMessageStreamImpl: public NYql::NPureCalc::IStream<NPureCalcProto::TSimpleMessage*> {
+public:
+ TSimpleMessageStreamImpl(i32 value)
+ {
+ Message_.SetX(value);
+ }
+
+ NPureCalcProto::TSimpleMessage* Fetch() override {
+ if (Exhausted_) {
+ return nullptr;
+ } else {
+ Exhausted_ = true;
+ return &Message_;
+ }
+ }
+
+private:
+ NPureCalcProto::TSimpleMessage Message_;
+ bool Exhausted_ = false;
+};
+
+class TAllTypesConsumerImpl: public NYql::NPureCalc::IConsumer<NPureCalcProto::TAllTypes*> {
+private:
+ int I_ = 0;
+
+public:
+ void OnObject(NPureCalcProto::TAllTypes* t) override {
+ I_ += 1;
+ AssertEqualToCanonical(*t);
+ }
+
+ void OnFinish() override {
+ UNIT_ASSERT(I_ > 0);
+ }
+};
+
+class TStringMessageStreamImpl: public NYql::NPureCalc::IStream<NPureCalcProto::TStringMessage*> {
+private:
+ int I_ = 0;
+ NPureCalcProto::TStringMessage Message_{};
+
+public:
+ NPureCalcProto::TStringMessage* Fetch() override {
+ if (I_ >= 3) {
+ return nullptr;
+ } else {
+ Message_.SetX(TString("-") * I_);
+ I_ += 1;
+ return &Message_;
+ }
+ }
+};
+
+class TSimpleMessageConsumerImpl: public NYql::NPureCalc::IConsumer<NPureCalcProto::TSimpleMessage*> {
+private:
+ TVector<int>* Buf_;
+
+public:
+ TSimpleMessageConsumerImpl(TVector<int>* buf)
+ : Buf_(buf)
+ {
+ }
+
+public:
+ void OnObject(NPureCalcProto::TSimpleMessage* t) override {
+ Buf_->push_back(t->GetX());
+ }
+
+ void OnFinish() override {
+ Buf_->push_back(-100);
+ }
+};
+
+using TMessagesVariant = std::variant<NPureCalcProto::TSplitted1*, NPureCalcProto::TSplitted2*, NPureCalcProto::TStringMessage*>;
+
+class TVariantConsumerImpl: public NYql::NPureCalc::IConsumer<TMessagesVariant> {
+public:
+ using TType0 = TVector<std::pair<i32, TString>>;
+ using TType1 = TVector<std::pair<ui32, TString>>;
+ using TType2 = TVector<TString>;
+
+public:
+ TVariantConsumerImpl(TType0* q0, TType1* q1, TType2* q2, int* v)
+ : Queue0_(q0)
+ , Queue1_(q1)
+ , Queue2_(q2)
+ , Value_(v)
+ {
+ }
+
+ void OnObject(TMessagesVariant value) override {
+ if (auto* p = std::get_if<0>(&value)) {
+ Queue0_->push_back({(*p)->GetBInt(), std::move(*(*p)->MutableBString())});
+ } else if (auto* p = std::get_if<1>(&value)) {
+ Queue1_->push_back({(*p)->GetCUint(), std::move(*(*p)->MutableCString())});
+ } else if (auto* p = std::get_if<2>(&value)) {
+ Queue2_->push_back(std::move(*(*p)->MutableX()));
+ } else {
+ Y_FAIL("invalid variant alternative");
+ }
+ }
+
+ void OnFinish() override {
+ *Value_ = 42;
+ }
+
+private:
+ TType0* Queue0_;
+ TType1* Queue1_;
+ TType2* Queue2_;
+ int* Value_;
+};
+
+class TUnsplittedStreamImpl: public NYql::NPureCalc::IStream<NPureCalcProto::TUnsplitted*> {
+public:
+ TUnsplittedStreamImpl()
+ {
+ Message_.SetAInt(-23);
+ Message_.SetAUint(111);
+ Message_.SetAString("Hello!");
+ }
+
+public:
+ NPureCalcProto::TUnsplitted* Fetch() override {
+ switch (I_) {
+ case 0:
+ ++I_;
+ return &Message_;
+ case 1:
+ ++I_;
+ Message_.SetABool(false);
+ return &Message_;
+ case 2:
+ ++I_;
+ Message_.SetABool(true);
+ return &Message_;
+ default:
+ return nullptr;
+ }
+ }
+
+private:
+ NPureCalcProto::TUnsplitted Message_;
+ ui32 I_ = 0;
+};
+
+template<typename T>
+struct TVectorConsumer: public NYql::NPureCalc::IConsumer<T*> {
+ TVector<T> Data;
+
+ void OnObject(T* t) override {
+ Data.push_back(*t);
+ }
+
+ void OnFinish() override {
+ }
+};
+
+template <typename T>
+struct TVectorStream: public NYql::NPureCalc::IStream<T*> {
+ TVector<T> Data;
+ size_t Index = 0;
+
+public:
+ T* Fetch() override {
+ return Index < Data.size() ? &Data[Index++] : nullptr;
+ }
+};
+
+Y_UNIT_TEST_SUITE(TestProtoIO) {
+ Y_UNIT_TEST(TestAllTypes) {
+ using namespace NYql::NPureCalc;
+
+ auto factory = MakeProgramFactory();
+
+ {
+ auto program = factory->MakePullStreamProgram(
+ TProtobufInputSpec<NPureCalcProto::TAllTypes>(),
+ TProtobufOutputSpec<NPureCalcProto::TAllTypes>(),
+ "SELECT * FROM Input",
+ ETranslationMode::SQL
+ );
+
+ auto stream = program->Apply(MakeHolder<TAllTypesStreamImpl>());
+
+ NPureCalcProto::TAllTypes* message;
+
+ UNIT_ASSERT(message = stream->Fetch());
+ AssertEqualToCanonical(*message);
+ UNIT_ASSERT(!stream->Fetch());
+ }
+
+ {
+ auto program = factory->MakePullListProgram(
+ TProtobufInputSpec<NPureCalcProto::TAllTypes>(),
+ TProtobufOutputSpec<NPureCalcProto::TAllTypes>(),
+ "SELECT * FROM Input",
+ ETranslationMode::SQL
+ );
+
+ auto stream = program->Apply(MakeHolder<TAllTypesStreamImpl>());
+
+ NPureCalcProto::TAllTypes* message;
+
+ UNIT_ASSERT(message = stream->Fetch());
+ AssertEqualToCanonical(*message);
+ UNIT_ASSERT(!stream->Fetch());
+ }
+
+ {
+ auto program = factory->MakePushStreamProgram(
+ TProtobufInputSpec<NPureCalcProto::TAllTypes>(),
+ TProtobufOutputSpec<NPureCalcProto::TAllTypes>(),
+ "SELECT * FROM Input",
+ ETranslationMode::SQL
+ );
+
+ auto consumer = program->Apply(MakeHolder<TAllTypesConsumerImpl>());
+
+ UNIT_ASSERT_NO_EXCEPTION([&](){ consumer->OnObject(&GetCanonicalMessage()); }());
+ UNIT_ASSERT_NO_EXCEPTION([&](){ consumer->OnFinish(); }());
+ }
+ }
+
+ template <typename T>
+ void CheckPassThroughYql(T& testInput, google::protobuf::Arena* arena = nullptr) {
+ using namespace NYql::NPureCalc;
+
+ auto factory = MakeProgramFactory();
+
+ {
+ auto program = factory->MakePushStreamProgram(
+ TProtobufInputSpec<T>(),
+ TProtobufOutputSpec<T>({}, arena),
+ "SELECT * FROM Input",
+ ETranslationMode::SQL
+ );
+
+ auto resultConsumer = MakeHolder<TVectorConsumer<T>>();
+ auto* resultConsumerPtr = resultConsumer.Get();
+ auto sourceConsumer = program->Apply(std::move(resultConsumer));
+
+ sourceConsumer->OnObject(&testInput);
+ UNIT_ASSERT_VALUES_EQUAL(1, resultConsumerPtr->Data.size());
+ AssertProtoEqual(resultConsumerPtr->Data[0], testInput);
+
+ resultConsumerPtr->Data.clear();
+ sourceConsumer->OnObject(&testInput);
+ UNIT_ASSERT_VALUES_EQUAL(1, resultConsumerPtr->Data.size());
+ AssertProtoEqual(resultConsumerPtr->Data[0], testInput);
+ }
+
+ {
+ auto program = factory->MakePullStreamProgram(
+ TProtobufInputSpec<T>(),
+ TProtobufOutputSpec<T>({}, arena),
+ "SELECT * FROM Input",
+ ETranslationMode::SQL
+ );
+
+ auto sourceStream = MakeHolder<TVectorStream<T>>();
+ auto* sourceStreamPtr = sourceStream.Get();
+ auto resultStream = program->Apply(std::move(sourceStream));
+
+ sourceStreamPtr->Data.push_back(testInput);
+ T* resultMessage;
+ UNIT_ASSERT(resultMessage = resultStream->Fetch());
+ AssertProtoEqual(*resultMessage, testInput);
+ UNIT_ASSERT(!resultStream->Fetch());
+
+ UNIT_ASSERT_VALUES_EQUAL(resultMessage->GetArena(), arena);
+
+ if (arena != nullptr) {
+ arena->Reset();
+ }
+ }
+
+ {
+ auto program = factory->MakePullListProgram(
+ TProtobufInputSpec<T>(),
+ TProtobufOutputSpec<T>({}, arena),
+ "SELECT * FROM Input",
+ ETranslationMode::SQL
+ );
+
+ auto sourceStream = MakeHolder<TVectorStream<T>>();
+ auto* sourceStreamPtr = sourceStream.Get();
+ auto resultStream = program->Apply(std::move(sourceStream));
+
+ sourceStreamPtr->Data.push_back(testInput);
+ T* resultMessage;
+ UNIT_ASSERT(resultMessage = resultStream->Fetch());
+ AssertProtoEqual(*resultMessage, testInput);
+ UNIT_ASSERT(!resultStream->Fetch());
+
+ UNIT_ASSERT_VALUES_EQUAL(resultMessage->GetArena(), arena);
+
+ if (arena != nullptr) {
+ arena->Reset();
+ }
+ }
+ }
+
+ template <typename T>
+ void CheckMessageIsInvalid(const TString& expectedExceptionMessage) {
+ using namespace NYql::NPureCalc;
+
+ auto factory = MakeProgramFactory();
+
+ UNIT_ASSERT_EXCEPTION_CONTAINS([&]() {
+ factory->MakePushStreamProgram(TProtobufInputSpec<T>(), TProtobufOutputSpec<T>(), "SELECT * FROM Input", ETranslationMode::SQL);
+ }(), yexception, expectedExceptionMessage);
+
+ UNIT_ASSERT_EXCEPTION_CONTAINS([&]() {
+ factory->MakePullStreamProgram(TProtobufInputSpec<T>(), TProtobufOutputSpec<T>(), "SELECT * FROM Input", ETranslationMode::SQL);
+ }(), yexception, expectedExceptionMessage);
+
+ UNIT_ASSERT_EXCEPTION_CONTAINS([&]() {
+ factory->MakePullListProgram(TProtobufInputSpec<T>(), TProtobufOutputSpec<T>(), "SELECT * FROM Input", ETranslationMode::SQL);
+ }(), yexception, expectedExceptionMessage);
+ }
+
+ Y_UNIT_TEST(TestSimpleNested) {
+ NPureCalcProto::TSimpleNested input;
+ input.SetX(10);
+ {
+ auto* item = input.MutableY();
+ *item = GetCanonicalMessage();
+ item->SetFUint64(100);
+ }
+ CheckPassThroughYql(input);
+ }
+
+ Y_UNIT_TEST(TestOptionalNested) {
+ NPureCalcProto::TOptionalNested input;
+ {
+ auto* item = input.MutableX();
+ *item = GetCanonicalMessage();
+ item->SetFUint64(100);
+ }
+ CheckPassThroughYql(input);
+ }
+
+ Y_UNIT_TEST(TestSimpleRepeated) {
+ NPureCalcProto::TSimpleRepeated input;
+ input.SetX(20);
+ input.AddY(100);
+ input.AddY(200);
+ input.AddY(300);
+ CheckPassThroughYql(input);
+ }
+
+ Y_UNIT_TEST(TestNestedRepeated) {
+ NPureCalcProto::TNestedRepeated input;
+ input.SetX(20);
+ {
+ auto* item = input.MutableY()->Add();
+ item->SetX(100);
+ {
+ auto* y = item->MutableY();
+ *y = GetCanonicalMessage();
+ y->SetFUint64(1000);
+ }
+ }
+ {
+ auto* item = input.MutableY()->Add();
+ item->SetX(200);
+ {
+ auto* y = item->MutableY();
+ *y = GetCanonicalMessage();
+ y->SetFUint64(2000);
+ }
+ }
+ CheckPassThroughYql(input);
+ }
+
+ Y_UNIT_TEST(TestMessageWithEnum) {
+ NPureCalcProto::TMessageWithEnum input;
+ input.AddEnumValue(NPureCalcProto::TMessageWithEnum::VALUE1);
+ input.AddEnumValue(NPureCalcProto::TMessageWithEnum::VALUE2);
+ CheckPassThroughYql(input);
+ }
+
+ Y_UNIT_TEST(TestRecursive) {
+ CheckMessageIsInvalid<NPureCalcProto::TRecursive>("NPureCalcProto.TRecursive->NPureCalcProto.TRecursive");
+ }
+
+ Y_UNIT_TEST(TestRecursiveIndirectly) {
+ CheckMessageIsInvalid<NPureCalcProto::TRecursiveIndirectly>(
+ "NPureCalcProto.TRecursiveIndirectly->NPureCalcProto.TRecursiveIndirectly.TNested->NPureCalcProto.TRecursiveIndirectly");
+ }
+
+ Y_UNIT_TEST(TestColumnsFilter) {
+ using namespace NYql::NPureCalc;
+
+ auto factory = MakeProgramFactory();
+
+ auto filter = THashSet<TString>({"FFixed64", "FBool", "FBytes"});
+
+ NPureCalcProto::TOptionalAllTypes canonicalMessage;
+ canonicalMessage.SetFFixed64(GetCanonicalMessage().GetFFixed64());
+ canonicalMessage.SetFBool(GetCanonicalMessage().GetFBool());
+ canonicalMessage.SetFBytes(GetCanonicalMessage().GetFBytes());
+
+ {
+ auto inputSpec = TProtobufInputSpec<NPureCalcProto::TAllTypes>();
+ auto outputSpec = TProtobufOutputSpec<NPureCalcProto::TOptionalAllTypes>();
+ outputSpec.SetOutputColumnsFilter(filter);
+
+ auto program = factory->MakePullStreamProgram(
+ inputSpec,
+ outputSpec,
+ "SELECT * FROM Input",
+ ETranslationMode::SQL
+ );
+
+ UNIT_ASSERT_EQUAL(program->GetUsedColumns(), filter);
+
+ auto stream = program->Apply(MakeHolder<TAllTypesStreamImpl>());
+
+ NPureCalcProto::TOptionalAllTypes* message;
+
+ UNIT_ASSERT(message = stream->Fetch());
+ AssertEqualToCanonical(*message, canonicalMessage);
+ UNIT_ASSERT(!stream->Fetch());
+ }
+ }
+
+ Y_UNIT_TEST(TestColumnsFilterWithOptionalFields) {
+ using namespace NYql::NPureCalc;
+
+ auto factory = MakeProgramFactory();
+
+ auto fields = THashSet<TString>({"FFixed64", "FBool", "FBytes"});
+
+ NPureCalcProto::TOptionalAllTypes canonicalMessage;
+ canonicalMessage.SetFFixed64(GetCanonicalMessage().GetFFixed64());
+ canonicalMessage.SetFBool(GetCanonicalMessage().GetFBool());
+ canonicalMessage.SetFBytes(GetCanonicalMessage().GetFBytes());
+
+ {
+ auto program = factory->MakePullStreamProgram(
+ TProtobufInputSpec<NPureCalcProto::TAllTypes>(),
+ TProtobufOutputSpec<NPureCalcProto::TOptionalAllTypes>(),
+ "SELECT FFixed64, FBool, FBytes FROM Input",
+ ETranslationMode::SQL
+ );
+
+ UNIT_ASSERT_EQUAL(program->GetUsedColumns(), fields);
+
+ auto stream = program->Apply(MakeHolder<TAllTypesStreamImpl>());
+
+ NPureCalcProto::TOptionalAllTypes* message;
+
+ UNIT_ASSERT(message = stream->Fetch());
+ AssertEqualToCanonical(*message, canonicalMessage);
+ UNIT_ASSERT(!stream->Fetch());
+ }
+
+ UNIT_ASSERT_EXCEPTION_CONTAINS([&](){
+ factory->MakePullStreamProgram(
+ TProtobufInputSpec<NPureCalcProto::TAllTypes>(),
+ TProtobufOutputSpec<NPureCalcProto::TAllTypes>(),
+ "SELECT FFixed64, FBool, FBytes FROM Input",
+ ETranslationMode::SQL
+ );
+ }(), TCompileError, "Failed to optimize");
+ }
+
+ Y_UNIT_TEST(TestUsedColumns) {
+ using namespace NYql::NPureCalc;
+
+ auto factory = MakeProgramFactory();
+
+ auto allFields = THashSet<TString>();
+
+ for (auto i: xrange(NPureCalcProto::TOptionalAllTypes::descriptor()->field_count())) {
+ allFields.emplace(NPureCalcProto::TOptionalAllTypes::descriptor()->field(i)->name());
+ }
+
+ {
+ auto program = factory->MakePullStreamProgram(
+ TProtobufInputSpec<NPureCalcProto::TAllTypes>(),
+ TProtobufOutputSpec<NPureCalcProto::TOptionalAllTypes>(),
+ "SELECT * FROM Input",
+ ETranslationMode::SQL
+ );
+
+ UNIT_ASSERT_EQUAL(program->GetUsedColumns(), allFields);
+ }
+ }
+
+ Y_UNIT_TEST(TestChaining) {
+ using namespace NYql::NPureCalc;
+
+ auto factory = MakeProgramFactory();
+
+ TString sql1 = "SELECT UNWRAP(X || CAST(\"HI\" AS Utf8)) AS X FROM Input";
+ TString sql2 = "SELECT LENGTH(X) AS X FROM Input";
+
+ {
+ auto program1 = factory->MakePullStreamProgram(
+ TProtobufInputSpec<NPureCalcProto::TStringMessage>(),
+ TProtobufOutputSpec<NPureCalcProto::TStringMessage>(),
+ sql1,
+ ETranslationMode::SQL
+ );
+
+ auto program2 = factory->MakePullStreamProgram(
+ TProtobufInputSpec<NPureCalcProto::TStringMessage>(),
+ TProtobufOutputSpec<NPureCalcProto::TSimpleMessage>(),
+ sql2,
+ ETranslationMode::SQL
+ );
+
+ auto input = MakeHolder<TStringMessageStreamImpl>();
+ auto intermediate = program1->Apply(std::move(input));
+ auto output = program2->Apply(std::move(intermediate));
+
+ TVector<int> expected = {2, 3, 4};
+ TVector<int> actual{};
+
+ while (auto *x = output->Fetch()) {
+ actual.push_back(x->GetX());
+ }
+
+ UNIT_ASSERT_EQUAL(expected, actual);
+ }
+
+ {
+ auto program1 = factory->MakePullListProgram(
+ TProtobufInputSpec<NPureCalcProto::TStringMessage>(),
+ TProtobufOutputSpec<NPureCalcProto::TStringMessage>(),
+ sql1,
+ ETranslationMode::SQL
+ );
+
+ auto program2 = factory->MakePullListProgram(
+ TProtobufInputSpec<NPureCalcProto::TStringMessage>(),
+ TProtobufOutputSpec<NPureCalcProto::TSimpleMessage>(),
+ sql2,
+ ETranslationMode::SQL
+ );
+
+ auto input = MakeHolder<TStringMessageStreamImpl>();
+ auto intermediate = program1->Apply(std::move(input));
+ auto output = program2->Apply(std::move(intermediate));
+
+ TVector<int> expected = {2, 3, 4};
+ TVector<int> actual{};
+
+ while (auto *x = output->Fetch()) {
+ actual.push_back(x->GetX());
+ }
+
+ UNIT_ASSERT_EQUAL(expected, actual);
+ }
+
+ {
+ auto program1 = factory->MakePushStreamProgram(
+ TProtobufInputSpec<NPureCalcProto::TStringMessage>(),
+ TProtobufOutputSpec<NPureCalcProto::TStringMessage>(),
+ sql1,
+ ETranslationMode::SQL
+ );
+
+ auto program2 = factory->MakePushStreamProgram(
+ TProtobufInputSpec<NPureCalcProto::TStringMessage>(),
+ TProtobufOutputSpec<NPureCalcProto::TSimpleMessage>(),
+ sql2,
+ ETranslationMode::SQL
+ );
+
+ TVector<int> expected = {2, 3, 4, -100};
+ TVector<int> actual{};
+
+ auto consumer = MakeHolder<TSimpleMessageConsumerImpl>(&actual);
+ auto intermediate = program2->Apply(std::move(consumer));
+ auto input = program1->Apply(std::move(intermediate));
+
+ NPureCalcProto::TStringMessage Message;
+
+ Message.SetX("");
+ input->OnObject(&Message);
+
+ Message.SetX("1");
+ input->OnObject(&Message);
+
+ Message.SetX("22");
+ input->OnObject(&Message);
+
+ input->OnFinish();
+
+ UNIT_ASSERT_EQUAL(expected, actual);
+ }
+ }
+
+ Y_UNIT_TEST(TestTimestampColumn) {
+ using namespace NYql::NPureCalc;
+
+ auto factory = MakeProgramFactory(TProgramFactoryOptions()
+ .SetDeterministicTimeProviderSeed(1)); // seconds
+
+ NPureCalcProto::TOptionalAllTypes canonicalMessage;
+
+ {
+ auto inputSpec = TProtobufInputSpec<NPureCalcProto::TAllTypes>("MyTimestamp");
+ auto outputSpec = TProtobufOutputSpec<NPureCalcProto::TOptionalAllTypes>();
+
+ auto program = factory->MakePullStreamProgram(
+ inputSpec,
+ outputSpec,
+ "SELECT MyTimestamp AS FFixed64 FROM Input",
+ ETranslationMode::SQL
+ );
+
+ auto stream = program->Apply(MakeHolder<TAllTypesStreamImpl>());
+
+ NPureCalcProto::TOptionalAllTypes* message;
+
+ UNIT_ASSERT(message = stream->Fetch());
+ UNIT_ASSERT_VALUES_EQUAL(message->GetFFixed64(), 1000000); // microseconds
+ UNIT_ASSERT(!stream->Fetch());
+ }
+ }
+
+ Y_UNIT_TEST(TestTableNames) {
+ using namespace NYql::NPureCalc;
+
+ auto factory = MakeProgramFactory(TProgramFactoryOptions().SetUseSystemColumns(true));
+
+ auto runTest = [&](TStringBuf tableName, i32 value) {
+ auto program = factory->MakePullStreamProgram(
+ TProtobufInputSpec<NPureCalcProto::TSimpleMessage>(),
+ TProtobufOutputSpec<NPureCalcProto::TNamedSimpleMessage>(),
+ TString::Join("SELECT TableName() AS Name, X FROM ", tableName),
+ ETranslationMode::SQL
+ );
+
+ auto stream = program->Apply(MakeHolder<TSimpleMessageStreamImpl>(value));
+ auto message = stream->Fetch();
+
+ UNIT_ASSERT(message);
+ UNIT_ASSERT_VALUES_EQUAL(message->GetX(), value);
+ UNIT_ASSERT_VALUES_EQUAL(message->GetName(), tableName);
+ UNIT_ASSERT(!stream->Fetch());
+ };
+
+ runTest("Input", 37);
+ runTest("Input0", -23);
+ }
+
+ void CheckMultiOutputs(TMaybe<TVector<google::protobuf::Arena*>> arenas) {
+ using namespace NYql::NPureCalc;
+
+ auto factory = MakeProgramFactory();
+ TString sExpr = R"(
+(
+ (let $type (ParseType '"Variant<Struct<BInt:Int32,BString:Utf8>, Struct<CUint:Uint32,CString:Utf8>, Struct<X:Utf8>>"))
+ (let $stream (Self '0))
+ (return (FlatMap (Self '0) (lambda '(x) (block '(
+ (let $cond (Member x 'ABool))
+ (let $item0 (Variant (AsStruct '('BInt (Member x 'AInt)) '('BString (Member x 'AString))) '0 $type))
+ (let $item1 (Variant (AsStruct '('CUint (Member x 'AUint)) '('CString (Member x 'AString))) '1 $type))
+ (let $item2 (Variant (AsStruct '('X (Utf8 'Error))) '2 $type))
+ (return (If (Exists $cond) (If (Unwrap $cond) (AsList $item0) (AsList $item1)) (AsList $item2)))
+ )))))
+)
+ )";
+
+ {
+ auto program = factory->MakePushStreamProgram(
+ TProtobufInputSpec<NPureCalcProto::TUnsplitted>(),
+ TProtobufMultiOutputSpec<NPureCalcProto::TSplitted1, NPureCalcProto::TSplitted2, NPureCalcProto::TStringMessage>(
+ {}, arenas
+ ),
+ sExpr,
+ ETranslationMode::SExpr
+ );
+
+ TVariantConsumerImpl::TType0 queue0;
+ TVariantConsumerImpl::TType1 queue1;
+ TVariantConsumerImpl::TType2 queue2;
+ int finalValue = 0;
+
+ auto consumer = MakeHolder<TVariantConsumerImpl>(&queue0, &queue1, &queue2, &finalValue);
+ auto input = program->Apply(std::move(consumer));
+
+ NPureCalcProto::TUnsplitted message;
+ message.SetAInt(-13);
+ message.SetAUint(47);
+ message.SetAString("first message");
+ message.SetABool(true);
+
+ input->OnObject(&message);
+ UNIT_ASSERT(queue0.size() == 1 && queue1.empty() && queue2.empty() && finalValue == 0);
+
+ message.SetABool(false);
+ message.SetAString("second message");
+
+ input->OnObject(&message);
+ UNIT_ASSERT(queue0.size() == 1 && queue1.size() == 1 && queue2.empty() && finalValue == 0);
+
+ message.ClearABool();
+
+ input->OnObject(&message);
+ UNIT_ASSERT(queue0.size() == 1 && queue1.size() == 1 && queue2.size() == 1 && finalValue == 0);
+
+ input->OnFinish();
+ UNIT_ASSERT(queue0.size() == 1 && queue1.size() == 1 && queue2.size() == 1 && finalValue == 42);
+
+ TVariantConsumerImpl::TType0 expected0 = {{-13, "first message"}};
+ UNIT_ASSERT_EQUAL(queue0, expected0);
+
+ TVariantConsumerImpl::TType1 expected1 = {{47, "second message"}};
+ UNIT_ASSERT_EQUAL(queue1, expected1);
+
+ TVariantConsumerImpl::TType2 expected2 = {{"Error"}};
+ UNIT_ASSERT_EQUAL(queue2, expected2);
+ }
+
+ {
+ auto program1 = factory->MakePullStreamProgram(
+ TProtobufInputSpec<NPureCalcProto::TUnsplitted>(),
+ TProtobufMultiOutputSpec<NPureCalcProto::TSplitted1, NPureCalcProto::TSplitted2, NPureCalcProto::TStringMessage>(
+ {}, arenas
+ ),
+ sExpr,
+ ETranslationMode::SExpr
+ );
+
+ auto program2 = factory->MakePullListProgram(
+ TProtobufInputSpec<NPureCalcProto::TUnsplitted>(),
+ TProtobufMultiOutputSpec<NPureCalcProto::TSplitted1, NPureCalcProto::TSplitted2, NPureCalcProto::TStringMessage>(
+ {}, arenas
+ ),
+ sExpr,
+ ETranslationMode::SExpr
+ );
+
+ auto input1 = MakeHolder<TUnsplittedStreamImpl>();
+ auto output1 = program1->Apply(std::move(input1));
+
+ auto input2 = MakeHolder<TUnsplittedStreamImpl>();
+ auto output2 = program2->Apply(std::move(input2));
+
+ decltype(output1->Fetch()) variant1;
+ decltype(output2->Fetch()) variant2;
+
+#define ASSERT_EQUAL_FIELDS(X1, X2, I, F, E) \
+ UNIT_ASSERT_EQUAL(X1.index(), I); \
+ UNIT_ASSERT_EQUAL(X2.index(), I); \
+ UNIT_ASSERT_EQUAL(std::get<I>(X1)->Get##F(), E); \
+ UNIT_ASSERT_EQUAL(std::get<I>(X2)->Get##F(), E)
+
+ variant1 = output1->Fetch();
+ variant2 = output2->Fetch();
+ ASSERT_EQUAL_FIELDS(variant1, variant2, 2, X, "Error");
+ ASSERT_EQUAL_FIELDS(variant1, variant2, 2, Arena, (arenas.Defined() ? arenas->at(2) : nullptr));
+
+ variant1 = output1->Fetch();
+ variant2 = output2->Fetch();
+ ASSERT_EQUAL_FIELDS(variant1, variant2, 1, CUint, 111);
+ ASSERT_EQUAL_FIELDS(variant1, variant2, 1, CString, "Hello!");
+ ASSERT_EQUAL_FIELDS(variant1, variant2, 1, Arena, (arenas.Defined() ? arenas->at(1) : nullptr));
+
+ variant1 = output1->Fetch();
+ variant2 = output2->Fetch();
+ ASSERT_EQUAL_FIELDS(variant1, variant2, 0, BInt, -23);
+ ASSERT_EQUAL_FIELDS(variant1, variant2, 0, BString, "Hello!");
+ ASSERT_EQUAL_FIELDS(variant1, variant2, 0, Arena, (arenas.Defined() ? arenas->at(0) : nullptr));
+
+ variant1 = output1->Fetch();
+ variant2 = output2->Fetch();
+ UNIT_ASSERT_EQUAL(variant1.index(), 0);
+ UNIT_ASSERT_EQUAL(variant2.index(), 0);
+ UNIT_ASSERT_EQUAL(std::get<0>(variant1), nullptr);
+ UNIT_ASSERT_EQUAL(std::get<0>(variant1), nullptr);
+
+#undef ASSERT_EQUAL_FIELDS
+ }
+ }
+
+ Y_UNIT_TEST(TestMultiOutputs) {
+ CheckMultiOutputs(Nothing());
+ }
+
+ Y_UNIT_TEST(TestSupportedTypes) {
+
+ }
+
+ Y_UNIT_TEST(TestProtobufArena) {
+ {
+ NPureCalcProto::TNestedRepeated input;
+ input.SetX(20);
+ {
+ auto* item = input.MutableY()->Add();
+ item->SetX(100);
+ {
+ auto* y = item->MutableY();
+ *y = GetCanonicalMessage();
+ y->SetFUint64(1000);
+ }
+ }
+ {
+ auto* item = input.MutableY()->Add();
+ item->SetX(200);
+ {
+ auto* y = item->MutableY();
+ *y = GetCanonicalMessage();
+ y->SetFUint64(2000);
+ }
+ }
+
+ google::protobuf::Arena arena;
+ CheckPassThroughYql(input, &arena);
+ }
+
+ {
+ google::protobuf::Arena arena1;
+ google::protobuf::Arena arena2;
+ TVector<google::protobuf::Arena*> arenas{&arena1, &arena2, &arena1};
+ CheckMultiOutputs(arenas);
+ }
+ }
+
+ Y_UNIT_TEST(TestFieldRenames) {
+ using namespace NYql::NPureCalc;
+
+ auto factory = MakeProgramFactory();
+
+ TString query = "SELECT InputAlias AS OutputAlias FROM Input";
+
+ auto inputProtoOptions = TProtoSchemaOptions();
+ inputProtoOptions.SetFieldRenames({{"X", "InputAlias"}});
+
+ auto inputSpec = TProtobufInputSpec<NPureCalcProto::TSimpleMessage>(
+ Nothing(), std::move(inputProtoOptions)
+ );
+
+ auto outputProtoOptions = TProtoSchemaOptions();
+ outputProtoOptions.SetFieldRenames({{"X", "OutputAlias"}});
+
+ auto outputSpec = TProtobufOutputSpec<NPureCalcProto::TSimpleMessage>(
+ std::move(outputProtoOptions)
+ );
+
+ {
+ auto program = factory->MakePullStreamProgram(
+ inputSpec, outputSpec, query, ETranslationMode::SQL
+ );
+
+ auto input = MakeHolder<TSimpleMessageStreamImpl>(1);
+ auto output = program->Apply(std::move(input));
+
+ TVector<int> expected = {1};
+ TVector<int> actual;
+
+ while (auto* x = output->Fetch()) {
+ actual.push_back(x->GetX());
+ }
+
+ UNIT_ASSERT_VALUES_EQUAL(expected, actual);
+ }
+
+ {
+ auto program = factory->MakePullListProgram(
+ inputSpec, outputSpec, query, ETranslationMode::SQL
+ );
+
+ auto input = MakeHolder<TSimpleMessageStreamImpl>(1);
+ auto output = program->Apply(std::move(input));
+
+ TVector<int> expected = {1};
+ TVector<int> actual;
+
+ while (auto* x = output->Fetch()) {
+ actual.push_back(x->GetX());
+ }
+
+ UNIT_ASSERT_VALUES_EQUAL(expected, actual);
+ }
+
+ {
+ auto program = factory->MakePushStreamProgram(
+ inputSpec, outputSpec, query, ETranslationMode::SQL
+ );
+
+ TVector<int> expected = {1, -100};
+ TVector<int> actual;
+
+ auto consumer = MakeHolder<TSimpleMessageConsumerImpl>(&actual);
+ auto input = program->Apply(std::move(consumer));
+
+ NPureCalcProto::TSimpleMessage Message;
+
+ Message.SetX(1);
+ input->OnObject(&Message);
+
+ input->OnFinish();
+
+ UNIT_ASSERT_VALUES_EQUAL(expected, actual);
+ }
+ }
+}
diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/ya.make b/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/ya.make
new file mode 100644
index 0000000000..ef457d0548
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/ya.make
@@ -0,0 +1,19 @@
+UNITTEST()
+
+PEERDIR(
+ library/cpp/protobuf/util
+ ydb/library/yql/public/udf/service/exception_policy
+ ydb/library/yql/public/purecalc/common
+ ydb/library/yql/public/purecalc/io_specs/protobuf
+ ydb/library/yql/public/purecalc/ut/protos
+)
+
+SIZE(MEDIUM)
+
+YQL_LAST_ABI_VERSION()
+
+SRCS(
+ test_spec.cpp
+)
+
+END()
diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf/ya.make b/ydb/library/yql/public/purecalc/io_specs/protobuf/ya.make
new file mode 100644
index 0000000000..a9efad989f
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/io_specs/protobuf/ya.make
@@ -0,0 +1,21 @@
+LIBRARY()
+
+PEERDIR(
+ ydb/library/yql/public/purecalc/common
+ ydb/library/yql/public/purecalc/io_specs/protobuf_raw
+)
+
+SRCS(
+ spec.cpp
+ proto_variant.cpp
+)
+
+
+ YQL_LAST_ABI_VERSION()
+
+
+END()
+
+RECURSE_FOR_TESTS(
+ ut
+)
diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..e482710c07
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,23 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(purecalc-io_specs-protobuf_raw)
+target_compile_options(purecalc-io_specs-protobuf_raw PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_link_libraries(purecalc-io_specs-protobuf_raw PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ public-purecalc-common
+ purecalc-helpers-protobuf
+)
+target_sources(purecalc-io_specs-protobuf_raw PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/proto_holder.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/spec.cpp
+)
diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/CMakeLists.linux-aarch64.txt b/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..eb794e6f37
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,24 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(purecalc-io_specs-protobuf_raw)
+target_compile_options(purecalc-io_specs-protobuf_raw PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_link_libraries(purecalc-io_specs-protobuf_raw PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ public-purecalc-common
+ purecalc-helpers-protobuf
+)
+target_sources(purecalc-io_specs-protobuf_raw PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/proto_holder.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/spec.cpp
+)
diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/CMakeLists.linux-x86_64.txt b/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..eb794e6f37
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,24 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(purecalc-io_specs-protobuf_raw)
+target_compile_options(purecalc-io_specs-protobuf_raw PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_link_libraries(purecalc-io_specs-protobuf_raw PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ public-purecalc-common
+ purecalc-helpers-protobuf
+)
+target_sources(purecalc-io_specs-protobuf_raw PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/proto_holder.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/spec.cpp
+)
diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/CMakeLists.txt b/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/CMakeLists.windows-x86_64.txt b/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..e482710c07
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,23 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(purecalc-io_specs-protobuf_raw)
+target_compile_options(purecalc-io_specs-protobuf_raw PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_link_libraries(purecalc-io_specs-protobuf_raw PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ public-purecalc-common
+ purecalc-helpers-protobuf
+)
+target_sources(purecalc-io_specs-protobuf_raw PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/proto_holder.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/spec.cpp
+)
diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/proto_holder.cpp b/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/proto_holder.cpp
new file mode 100644
index 0000000000..95adbc4de9
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/proto_holder.cpp
@@ -0,0 +1 @@
+#include "proto_holder.h"
diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/proto_holder.h b/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/proto_holder.h
new file mode 100644
index 0000000000..7d4d843bfc
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/proto_holder.h
@@ -0,0 +1,31 @@
+#pragma once
+
+#include <google/protobuf/arena.h>
+
+#include <util/generic/ptr.h>
+
+#include <type_traits>
+
+namespace NYql::NPureCalc {
+ class TProtoDestroyer {
+ public:
+ template <typename T>
+ static inline void Destroy(T* t) noexcept {
+ if (t->GetArena() == nullptr) {
+ CheckedDelete(t);
+ }
+ }
+ };
+
+ template <typename TProto>
+ concept IsProtoMessage = std::is_base_of_v<NProtoBuf::Message, TProto>;
+
+ template <IsProtoMessage TProto>
+ using TProtoHolder = THolder<TProto, TProtoDestroyer>;
+
+ template <IsProtoMessage TProto, typename... TArgs>
+ TProtoHolder<TProto> MakeProtoHolder(NProtoBuf::Arena* arena, TArgs&&... args) {
+ auto* ptr = NProtoBuf::Arena::CreateMessage<TProto>(arena, std::forward<TArgs>(args)...);
+ return TProtoHolder<TProto>(ptr);
+ }
+}
diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/spec.cpp b/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/spec.cpp
new file mode 100644
index 0000000000..8a6f71c5b3
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/spec.cpp
@@ -0,0 +1,1064 @@
+#include "proto_holder.h"
+#include "spec.h"
+
+#include <ydb/library/yql/public/udf/udf_value.h>
+#include <ydb/library/yql/minikql/computation/mkql_computation_node_holders.h>
+#include <ydb/library/yql/minikql/computation/mkql_custom_list.h>
+#include <ydb/library/yql/minikql/mkql_string_util.h>
+#include <ydb/library/yql/utils/yql_panic.h>
+#include <google/protobuf/reflection.h>
+
+using namespace NYql;
+using namespace NYql::NPureCalc;
+using namespace google::protobuf;
+using namespace NKikimr::NUdf;
+using namespace NKikimr::NMiniKQL;
+
+TProtobufRawInputSpec::TProtobufRawInputSpec(
+ const Descriptor& descriptor,
+ const TMaybe<TString>& timestampColumn,
+ const TProtoSchemaOptions& options
+)
+ : Descriptor_(descriptor)
+ , TimestampColumn_(timestampColumn)
+ , SchemaOptions_(options)
+{
+}
+
+const TVector<NYT::TNode>& TProtobufRawInputSpec::GetSchemas() const {
+ if (SavedSchemas_.size() == 0) {
+ SavedSchemas_.push_back(MakeSchemaFromProto(Descriptor_, SchemaOptions_));
+ if (TimestampColumn_) {
+ auto timestampType = NYT::TNode::CreateList();
+ timestampType.Add("DataType");
+ timestampType.Add("Uint64");
+ auto timestamp = NYT::TNode::CreateList();
+ timestamp.Add(*TimestampColumn_);
+ timestamp.Add(timestampType);
+ SavedSchemas_.back().AsList()[1].AsList().push_back(timestamp);
+ }
+ }
+
+ return SavedSchemas_;
+}
+
+const Descriptor& TProtobufRawInputSpec::GetDescriptor() const {
+ return Descriptor_;
+}
+
+const TMaybe<TString>& TProtobufRawInputSpec::GetTimestampColumn() const {
+ return TimestampColumn_;
+}
+
+const TProtoSchemaOptions& TProtobufRawInputSpec::GetSchemaOptions() const {
+ return SchemaOptions_;
+}
+
+TProtobufRawOutputSpec::TProtobufRawOutputSpec(
+ const Descriptor& descriptor,
+ MessageFactory* factory,
+ const TProtoSchemaOptions& options,
+ Arena* arena
+)
+ : Descriptor_(descriptor)
+ , Factory_(factory)
+ , SchemaOptions_(options)
+ , Arena_(arena)
+{
+ SchemaOptions_.ListIsOptional = true;
+}
+
+const NYT::TNode& TProtobufRawOutputSpec::GetSchema() const {
+ if (!SavedSchema_) {
+ SavedSchema_ = MakeSchemaFromProto(Descriptor_, SchemaOptions_);
+ }
+
+ return SavedSchema_.GetRef();
+}
+
+const Descriptor& TProtobufRawOutputSpec::GetDescriptor() const {
+ return Descriptor_;
+}
+
+void TProtobufRawOutputSpec::SetFactory(MessageFactory* factory) {
+ Factory_ = factory;
+}
+
+MessageFactory* TProtobufRawOutputSpec::GetFactory() const {
+ return Factory_;
+}
+
+void TProtobufRawOutputSpec::SetArena(Arena* arena) {
+ Arena_ = arena;
+}
+
+Arena* TProtobufRawOutputSpec::GetArena() const {
+ return Arena_;
+}
+
+const TProtoSchemaOptions& TProtobufRawOutputSpec::GetSchemaOptions() const {
+ return SchemaOptions_;
+}
+
+TProtobufRawMultiOutputSpec::TProtobufRawMultiOutputSpec(
+ TVector<const Descriptor*> descriptors,
+ TMaybe<TVector<MessageFactory*>> factories,
+ const TProtoSchemaOptions& options,
+ TMaybe<TVector<Arena*>> arenas
+)
+ : Descriptors_(std::move(descriptors))
+ , SchemaOptions_(options)
+{
+ if (factories) {
+ Y_ENSURE(factories->size() == Descriptors_.size(), "number of factories must match number of descriptors");
+ Factories_ = std::move(*factories);
+ } else {
+ Factories_ = TVector<MessageFactory*>(Descriptors_.size(), nullptr);
+ }
+
+ if (arenas) {
+ Y_ENSURE(arenas->size() == Descriptors_.size(), "number of arenas must match number of descriptors");
+ Arenas_ = std::move(*arenas);
+ } else {
+ Arenas_ = TVector<Arena*>(Descriptors_.size(), nullptr);
+ }
+}
+
+const NYT::TNode& TProtobufRawMultiOutputSpec::GetSchema() const {
+ if (SavedSchema_.IsUndefined()) {
+ SavedSchema_ = MakeVariantSchemaFromProtos(Descriptors_, SchemaOptions_);
+ }
+
+ return SavedSchema_;
+}
+
+const Descriptor& TProtobufRawMultiOutputSpec::GetDescriptor(ui32 index) const {
+ Y_ENSURE(index < Descriptors_.size(), "invalid output index");
+
+ return *Descriptors_[index];
+}
+
+void TProtobufRawMultiOutputSpec::SetFactory(ui32 index, MessageFactory* factory) {
+ Y_ENSURE(index < Factories_.size(), "invalid output index");
+
+ Factories_[index] = factory;
+}
+
+MessageFactory* TProtobufRawMultiOutputSpec::GetFactory(ui32 index) const {
+ Y_ENSURE(index < Factories_.size(), "invalid output index");
+
+ return Factories_[index];
+}
+
+void TProtobufRawMultiOutputSpec::SetArena(ui32 index, Arena* arena) {
+ Y_ENSURE(index < Arenas_.size(), "invalid output index");
+
+ Arenas_[index] = arena;
+}
+
+Arena* TProtobufRawMultiOutputSpec::GetArena(ui32 index) const {
+ Y_ENSURE(index < Arenas_.size(), "invalid output index");
+
+ return Arenas_[index];
+}
+
+ui32 TProtobufRawMultiOutputSpec::GetOutputsNumber() const {
+ return static_cast<ui32>(Descriptors_.size());
+}
+
+const TProtoSchemaOptions& TProtobufRawMultiOutputSpec::GetSchemaOptions() const {
+ return SchemaOptions_;
+}
+
+namespace {
+ struct TFieldMapping {
+ TString Name;
+ const FieldDescriptor* Field;
+ TVector<TFieldMapping> NestedFields;
+ };
+
+ /**
+ * Fills a tree of field mappings from the given yql struct type to protobuf message.
+ *
+ * @param fromType source yql type.
+ * @param toType target protobuf message type.
+ * @param mappings destination vector will be filled with field descriptors. Order of descriptors will match
+ * the order of field names.
+ */
+ void FillFieldMappings(
+ const TStructType* fromType,
+ const Descriptor& toType,
+ TVector<TFieldMapping>& mappings,
+ const TMaybe<TString>& timestampColumn,
+ bool listIsOptional,
+ const THashMap<TString, TString>& fieldRenames
+ ) {
+ THashMap<TString, TString> inverseFieldRenames;
+
+ for (const auto& [source, target]: fieldRenames) {
+ auto [iterator, emplaced] = inverseFieldRenames.emplace(target, source);
+ Y_ENSURE(emplaced, "Duplicate rename field found: " << source << " -> " << target);
+ }
+
+ mappings.resize(fromType->GetMembersCount());
+ for (ui32 i = 0; i < fromType->GetMembersCount(); ++i) {
+ TString fieldName(fromType->GetMemberName(i));
+ if (auto fieldRenamePtr = inverseFieldRenames.FindPtr(fieldName)) {
+ fieldName = *fieldRenamePtr;
+ }
+
+ mappings[i].Name = fieldName;
+ mappings[i].Field = toType.FindFieldByName(fieldName);
+ YQL_ENSURE(
+ mappings[i].Field || timestampColumn && *timestampColumn == fieldName,
+ "Missing field: " << fieldName);
+
+ const auto* fieldType = fromType->GetMemberType(i);
+ if (fieldType->GetKind() == NKikimr::NMiniKQL::TType::EKind::List) {
+ const auto* listType = static_cast<const NKikimr::NMiniKQL::TListType*>(fieldType);
+ fieldType = listType->GetItemType();
+ } else if (fieldType->GetKind() == NKikimr::NMiniKQL::TType::EKind::Optional) {
+ const auto* optionalType = static_cast<const NKikimr::NMiniKQL::TOptionalType*>(fieldType);
+ fieldType = optionalType->GetItemType();
+
+ if (listIsOptional) {
+ if (fieldType->GetKind() == NKikimr::NMiniKQL::TType::EKind::List) {
+ const auto* listType = static_cast<const NKikimr::NMiniKQL::TListType*>(fieldType);
+ fieldType = listType->GetItemType();
+ }
+ }
+ }
+ YQL_ENSURE(fieldType->GetKind() == NKikimr::NMiniKQL::TType::EKind::Struct ||
+ fieldType->GetKind() == NKikimr::NMiniKQL::TType::EKind::Data,
+ "unsupported field kind [" << fieldType->GetKindAsStr() << "], field [" << fieldName << "]");
+ if (fieldType->GetKind() == NKikimr::NMiniKQL::TType::EKind::Struct) {
+ FillFieldMappings(static_cast<const NKikimr::NMiniKQL::TStructType*>(fieldType),
+ *mappings[i].Field->message_type(),
+ mappings[i].NestedFields, Nothing(), listIsOptional, {});
+ }
+ }
+ }
+
+ /**
+ * Extract field values from the given protobuf message into an array of unboxed values.
+ *
+ * @param factory to create nested unboxed values.
+ * @param source source protobuf message.
+ * @param destination destination array of unboxed values. Each element in the array corresponds to a field
+ * in the protobuf message.
+ * @param mappings vector of protobuf field descriptors which denotes relation between fields of the
+ * source message and elements of the destination array.
+ * @param scratch temporary string which will be used during conversion.
+ */
+ void FillInputValue(
+ const THolderFactory& factory,
+ const Message* source,
+ TUnboxedValue* destination,
+ const TVector<TFieldMapping>& mappings,
+ const TMaybe<TString>& timestampColumn,
+ ITimeProvider* timeProvider,
+ EEnumPolicy enumPolicy
+ ) {
+ TString scratch;
+ auto reflection = source->GetReflection();
+ for (ui32 i = 0; i < mappings.size(); ++i) {
+ auto mapping = mappings[i];
+ if (!mapping.Field) {
+ YQL_ENSURE(timestampColumn && mapping.Name == *timestampColumn);
+ destination[i] = TUnboxedValuePod(timeProvider->Now().MicroSeconds());
+ continue;
+ }
+
+ const auto type = mapping.Field->type();
+ if (mapping.Field->label() == FieldDescriptor::LABEL_REPEATED) {
+ const auto size = static_cast<ui32>(reflection->FieldSize(*source, mapping.Field));
+ if (size == 0) {
+ destination[i] = factory.GetEmptyContainer();
+ } else {
+ TUnboxedValue* inplace = nullptr;
+ destination[i] = factory.CreateDirectArrayHolder(size, inplace);
+ for (ui32 j = 0; j < size; ++j) {
+ switch (type) {
+ case FieldDescriptor::TYPE_DOUBLE:
+ inplace[j] = TUnboxedValuePod(reflection->GetRepeatedDouble(*source, mapping.Field, j));
+ break;
+
+ case FieldDescriptor::TYPE_FLOAT:
+ inplace[j] = TUnboxedValuePod(reflection->GetRepeatedFloat(*source, mapping.Field, j));
+ break;
+
+ case FieldDescriptor::TYPE_INT64:
+ case FieldDescriptor::TYPE_SFIXED64:
+ case FieldDescriptor::TYPE_SINT64:
+ inplace[j] = TUnboxedValuePod(reflection->GetRepeatedInt64(*source, mapping.Field, j));
+ break;
+
+ case FieldDescriptor::TYPE_ENUM:
+ switch (EnumFormatType(*mapping.Field, enumPolicy)) {
+ case EEnumFormatType::Int32:
+ inplace[j] = TUnboxedValuePod(reflection->GetRepeatedEnumValue(*source, mapping.Field, j));
+ break;
+ case EEnumFormatType::String:
+ inplace[j] = MakeString(reflection->GetRepeatedEnum(*source, mapping.Field, j)->name());
+ break;
+ }
+ break;
+
+ case FieldDescriptor::TYPE_UINT64:
+ case FieldDescriptor::TYPE_FIXED64:
+ inplace[j] = TUnboxedValuePod(reflection->GetRepeatedUInt64(*source, mapping.Field, j));
+ break;
+
+ case FieldDescriptor::TYPE_INT32:
+ case FieldDescriptor::TYPE_SFIXED32:
+ case FieldDescriptor::TYPE_SINT32:
+ inplace[j] = TUnboxedValuePod(reflection->GetRepeatedInt32(*source, mapping.Field, j));
+ break;
+
+ case FieldDescriptor::TYPE_UINT32:
+ case FieldDescriptor::TYPE_FIXED32:
+ inplace[j] = TUnboxedValuePod(reflection->GetRepeatedUInt32(*source, mapping.Field, j));
+ break;
+
+ case FieldDescriptor::TYPE_BOOL:
+ inplace[j] = TUnboxedValuePod(reflection->GetRepeatedBool(*source, mapping.Field, j));
+ break;
+
+ case FieldDescriptor::TYPE_STRING:
+ inplace[j] = MakeString(reflection->GetRepeatedStringReference(*source, mapping.Field, j, &scratch));
+ break;
+
+ case FieldDescriptor::TYPE_BYTES:
+ inplace[j] = MakeString(reflection->GetRepeatedStringReference(*source, mapping.Field, j, &scratch));
+ break;
+
+ case FieldDescriptor::TYPE_MESSAGE:
+ {
+ const Message& nestedMessage = reflection->GetRepeatedMessage(*source, mapping.Field, j);
+ TUnboxedValue* nestedValues = nullptr;
+ inplace[j] = factory.CreateDirectArrayHolder(static_cast<ui32>(mapping.NestedFields.size()),
+ nestedValues);
+ FillInputValue(factory, &nestedMessage, nestedValues, mapping.NestedFields, Nothing(), timeProvider, enumPolicy);
+ }
+ break;
+
+ default:
+ ythrow yexception() << "Unsupported protobuf type: " << mapping.Field->type_name() << ", field: " << mapping.Field->name();
+ }
+ }
+ }
+ } else {
+ if (!reflection->HasField(*source, mapping.Field)) {
+ continue;
+ }
+
+ switch (type) {
+ case FieldDescriptor::TYPE_DOUBLE:
+ destination[i] = TUnboxedValuePod(reflection->GetDouble(*source, mapping.Field));
+ break;
+
+ case FieldDescriptor::TYPE_FLOAT:
+ destination[i] = TUnboxedValuePod(reflection->GetFloat(*source, mapping.Field));
+ break;
+
+ case FieldDescriptor::TYPE_INT64:
+ case FieldDescriptor::TYPE_SFIXED64:
+ case FieldDescriptor::TYPE_SINT64:
+ destination[i] = TUnboxedValuePod(reflection->GetInt64(*source, mapping.Field));
+ break;
+
+ case FieldDescriptor::TYPE_ENUM:
+ switch (EnumFormatType(*mapping.Field, enumPolicy)) {
+ case EEnumFormatType::Int32:
+ destination[i] = TUnboxedValuePod(reflection->GetEnumValue(*source, mapping.Field));
+ break;
+ case EEnumFormatType::String:
+ destination[i] = MakeString(reflection->GetEnum(*source, mapping.Field)->name());
+ break;
+ }
+ break;
+
+ case FieldDescriptor::TYPE_UINT64:
+ case FieldDescriptor::TYPE_FIXED64:
+ destination[i] = TUnboxedValuePod(reflection->GetUInt64(*source, mapping.Field));
+ break;
+
+ case FieldDescriptor::TYPE_INT32:
+ case FieldDescriptor::TYPE_SFIXED32:
+ case FieldDescriptor::TYPE_SINT32:
+ destination[i] = TUnboxedValuePod(reflection->GetInt32(*source, mapping.Field));
+ break;
+
+ case FieldDescriptor::TYPE_UINT32:
+ case FieldDescriptor::TYPE_FIXED32:
+ destination[i] = TUnboxedValuePod(reflection->GetUInt32(*source, mapping.Field));
+ break;
+
+ case FieldDescriptor::TYPE_BOOL:
+ destination[i] = TUnboxedValuePod(reflection->GetBool(*source, mapping.Field));
+ break;
+
+ case FieldDescriptor::TYPE_STRING:
+ destination[i] = MakeString(reflection->GetStringReference(*source, mapping.Field, &scratch));
+ break;
+
+ case FieldDescriptor::TYPE_BYTES:
+ destination[i] = MakeString(reflection->GetStringReference(*source, mapping.Field, &scratch));
+ break;
+ case FieldDescriptor::TYPE_MESSAGE:
+ {
+ const Message& nestedMessage = reflection->GetMessage(*source, mapping.Field);
+ TUnboxedValue* nestedValues = nullptr;
+ destination[i] = factory.CreateDirectArrayHolder(static_cast<ui32>(mapping.NestedFields.size()),
+ nestedValues);
+ FillInputValue(factory, &nestedMessage, nestedValues, mapping.NestedFields, Nothing(), timeProvider, enumPolicy);
+ }
+ break;
+
+ default:
+ ythrow yexception() << "Unsupported protobuf type: " << mapping.Field->type_name()
+ << ", field: " << mapping.Field->name();
+ }
+ }
+ }
+ }
+
+
+ /**
+ * Convert unboxed value to protobuf.
+ *
+ * @param source unboxed value to extract data from. Type of the value should be struct. It's UB to pass
+ * a non-struct value here.
+ * @param destination destination message. Data in this message will be overwritten
+ * by data from unboxed value.
+ * @param mappings vector of protobuf field descriptors which denotes relation between struct fields
+ * and message fields. For any i-th element of this vector, type of the i-th element of
+ * the unboxed structure must match type of the field pointed by descriptor. Size of this
+ * vector should match the number of fields in the struct.
+ */
+ void FillOutputMessage(
+ const TUnboxedValue& source,
+ Message* destination,
+ const TVector<TFieldMapping>& mappings,
+ EEnumPolicy enumPolicy
+ ) {
+ auto reflection = destination->GetReflection();
+ for (ui32 i = 0; i < mappings.size(); ++i) {
+ const auto& mapping = mappings[i];
+ const auto& cell = source.GetElement(i);
+ if (!cell) {
+ reflection->ClearField(destination, mapping.Field);
+ continue;
+ }
+ const auto type = mapping.Field->type();
+ if (mapping.Field->label() == FieldDescriptor::LABEL_REPEATED) {
+ const auto iter = cell.GetListIterator();
+ reflection->ClearField(destination, mapping.Field);
+ for (TUnboxedValue item; iter.Next(item);) {
+ switch (mapping.Field->type()) {
+ case FieldDescriptor::TYPE_DOUBLE:
+ reflection->AddDouble(destination, mapping.Field, item.Get<double>());
+ break;
+
+ case FieldDescriptor::TYPE_FLOAT:
+ reflection->AddFloat(destination, mapping.Field, item.Get<float>());
+ break;
+
+ case FieldDescriptor::TYPE_INT64:
+ case FieldDescriptor::TYPE_SFIXED64:
+ case FieldDescriptor::TYPE_SINT64:
+ reflection->AddInt64(destination, mapping.Field, item.Get<i64>());
+ break;
+
+ case FieldDescriptor::TYPE_ENUM: {
+ switch (EnumFormatType(*mapping.Field, enumPolicy)) {
+ case EEnumFormatType::Int32:
+ reflection->AddEnumValue(destination, mapping.Field, item.Get<i32>());
+ break;
+ case EEnumFormatType::String: {
+ auto enumValueDescriptor = mapping.Field->enum_type()->FindValueByName(TString(item.AsStringRef()));
+ if (!enumValueDescriptor) {
+ enumValueDescriptor = mapping.Field->default_value_enum();
+ }
+ reflection->AddEnum(destination, mapping.Field, enumValueDescriptor);
+ break;
+ }
+ }
+ break;
+ }
+
+ case FieldDescriptor::TYPE_UINT64:
+ case FieldDescriptor::TYPE_FIXED64:
+ reflection->AddUInt64(destination, mapping.Field, item.Get<ui64>());
+ break;
+
+ case FieldDescriptor::TYPE_INT32:
+ case FieldDescriptor::TYPE_SFIXED32:
+ case FieldDescriptor::TYPE_SINT32:
+ reflection->AddInt32(destination, mapping.Field, item.Get<i32>());
+ break;
+
+ case FieldDescriptor::TYPE_UINT32:
+ case FieldDescriptor::TYPE_FIXED32:
+ reflection->AddUInt32(destination, mapping.Field, item.Get<ui32>());
+ break;
+
+ case FieldDescriptor::TYPE_BOOL:
+ reflection->AddBool(destination, mapping.Field, item.Get<bool>());
+ break;
+
+ case FieldDescriptor::TYPE_STRING:
+ reflection->AddString(destination, mapping.Field, TString(item.AsStringRef()));
+ break;
+
+ case FieldDescriptor::TYPE_BYTES:
+ reflection->AddString(destination, mapping.Field, TString(item.AsStringRef()));
+ break;
+
+ case FieldDescriptor::TYPE_MESSAGE:
+ {
+ auto* nestedMessage = reflection->AddMessage(destination, mapping.Field);
+ FillOutputMessage(item, nestedMessage, mapping.NestedFields, enumPolicy);
+ }
+ break;
+
+ default:
+ ythrow yexception() << "Unsupported protobuf type: "
+ << mapping.Field->type_name() << ", field: " << mapping.Field->name();
+ }
+ }
+ } else {
+ switch (type) {
+ case FieldDescriptor::TYPE_DOUBLE:
+ reflection->SetDouble(destination, mapping.Field, cell.Get<double>());
+ break;
+
+ case FieldDescriptor::TYPE_FLOAT:
+ reflection->SetFloat(destination, mapping.Field, cell.Get<float>());
+ break;
+
+ case FieldDescriptor::TYPE_INT64:
+ case FieldDescriptor::TYPE_SFIXED64:
+ case FieldDescriptor::TYPE_SINT64:
+ reflection->SetInt64(destination, mapping.Field, cell.Get<i64>());
+ break;
+
+ case FieldDescriptor::TYPE_ENUM: {
+ switch (EnumFormatType(*mapping.Field, enumPolicy)) {
+ case EEnumFormatType::Int32:
+ reflection->SetEnumValue(destination, mapping.Field, cell.Get<i32>());
+ break;
+ case EEnumFormatType::String: {
+ auto enumValueDescriptor = mapping.Field->enum_type()->FindValueByName(TString(cell.AsStringRef()));
+ if (!enumValueDescriptor) {
+ enumValueDescriptor = mapping.Field->default_value_enum();
+ }
+ reflection->SetEnum(destination, mapping.Field, enumValueDescriptor);
+ break;
+ }
+ }
+ break;
+ }
+
+ case FieldDescriptor::TYPE_UINT64:
+ case FieldDescriptor::TYPE_FIXED64:
+ reflection->SetUInt64(destination, mapping.Field, cell.Get<ui64>());
+ break;
+
+ case FieldDescriptor::TYPE_INT32:
+ case FieldDescriptor::TYPE_SFIXED32:
+ case FieldDescriptor::TYPE_SINT32:
+ reflection->SetInt32(destination, mapping.Field, cell.Get<i32>());
+ break;
+
+ case FieldDescriptor::TYPE_UINT32:
+ case FieldDescriptor::TYPE_FIXED32:
+ reflection->SetUInt32(destination, mapping.Field, cell.Get<ui32>());
+ break;
+
+ case FieldDescriptor::TYPE_BOOL:
+ reflection->SetBool(destination, mapping.Field, cell.Get<bool>());
+ break;
+
+ case FieldDescriptor::TYPE_STRING:
+ reflection->SetString(destination, mapping.Field, TString(cell.AsStringRef()));
+ break;
+
+ case FieldDescriptor::TYPE_BYTES:
+ reflection->SetString(destination, mapping.Field, TString(cell.AsStringRef()));
+ break;
+
+ case FieldDescriptor::TYPE_MESSAGE:
+ {
+ auto* nestedMessage = reflection->MutableMessage(destination, mapping.Field);
+ FillOutputMessage(cell, nestedMessage, mapping.NestedFields, enumPolicy);
+ }
+ break;
+
+ default:
+ ythrow yexception() << "Unsupported protobuf type: "
+ << mapping.Field->type_name() << ", field: " << mapping.Field->name();
+ }
+ }
+ }
+ }
+
+ /**
+ * Converts input messages to unboxed values.
+ */
+ class TInputConverter {
+ protected:
+ IWorker* Worker_;
+ TVector<TFieldMapping> Mappings_;
+ TPlainContainerCache Cache_;
+ TMaybe<TString> TimestampColumn_;
+ EEnumPolicy EnumPolicy_ = EEnumPolicy::Int32;
+
+ public:
+ explicit TInputConverter(const TProtobufRawInputSpec& inputSpec, IWorker* worker)
+ : Worker_(worker)
+ , TimestampColumn_(inputSpec.GetTimestampColumn())
+ , EnumPolicy_(inputSpec.GetSchemaOptions().EnumPolicy)
+ {
+ FillFieldMappings(
+ Worker_->GetInputType(), inputSpec.GetDescriptor(),
+ Mappings_, TimestampColumn_,
+ inputSpec.GetSchemaOptions().ListIsOptional,
+ inputSpec.GetSchemaOptions().FieldRenames
+ );
+ }
+
+ public:
+ void DoConvert(const Message* message, TUnboxedValue& result) {
+ auto& holderFactory = Worker_->GetGraph().GetHolderFactory();
+ TUnboxedValue* items = nullptr;
+ result = Cache_.NewArray(holderFactory, static_cast<ui32>(Mappings_.size()), items);
+ FillInputValue(holderFactory, message, items, Mappings_, TimestampColumn_, Worker_->GetTimeProvider(), EnumPolicy_);
+ }
+
+ void ClearCache() {
+ Cache_.Clear();
+ }
+ };
+
+ template <typename TOutputSpec>
+ using OutputItemType = typename TOutputSpecTraits<TOutputSpec>::TOutputItemType;
+
+ template <typename TOutputSpec>
+ class TOutputConverter;
+
+ /**
+ * Converts unboxed values to output messages (single-output program case).
+ */
+ template <>
+ class TOutputConverter<TProtobufRawOutputSpec> {
+ protected:
+ IWorker* Worker_;
+ TVector<TFieldMapping> OutputColumns_;
+ TProtoHolder<Message> Message_;
+ EEnumPolicy EnumPolicy_ = EEnumPolicy::Int32;
+
+ public:
+ explicit TOutputConverter(const TProtobufRawOutputSpec& outputSpec, IWorker* worker)
+ : Worker_(worker)
+ , EnumPolicy_(outputSpec.GetSchemaOptions().EnumPolicy)
+ {
+ if (!Worker_->GetOutputType()->IsStruct()) {
+ ythrow yexception() << "protobuf output spec does not support multiple outputs";
+ }
+
+ FillFieldMappings(
+ static_cast<const NKikimr::NMiniKQL::TStructType*>(Worker_->GetOutputType()),
+ outputSpec.GetDescriptor(),
+ OutputColumns_,
+ Nothing(),
+ outputSpec.GetSchemaOptions().ListIsOptional,
+ outputSpec.GetSchemaOptions().FieldRenames
+ );
+
+ auto* factory = outputSpec.GetFactory();
+
+ if (!factory) {
+ factory = MessageFactory::generated_factory();
+ }
+
+ Message_.Reset(factory->GetPrototype(&outputSpec.GetDescriptor())->New(outputSpec.GetArena()));
+ }
+
+ OutputItemType<TProtobufRawOutputSpec> DoConvert(TUnboxedValue value) {
+ FillOutputMessage(value, Message_.Get(), OutputColumns_, EnumPolicy_);
+ return Message_.Get();
+ }
+ };
+
+ /*
+ * Converts unboxed values to output type (multi-output programs case).
+ */
+ template <>
+ class TOutputConverter<TProtobufRawMultiOutputSpec> {
+ protected:
+ IWorker* Worker_;
+ TVector<TVector<TFieldMapping>> OutputColumns_;
+ TVector<TProtoHolder<Message>> Messages_;
+ EEnumPolicy EnumPolicy_ = EEnumPolicy::Int32;
+
+ public:
+ explicit TOutputConverter(const TProtobufRawMultiOutputSpec& outputSpec, IWorker* worker)
+ : Worker_(worker)
+ , EnumPolicy_(outputSpec.GetSchemaOptions().EnumPolicy)
+ {
+ const auto* outputType = Worker_->GetOutputType();
+ Y_ENSURE(outputType->IsVariant(), "protobuf multi-output spec requires multi-output program");
+ const auto* variantType = static_cast<const NKikimr::NMiniKQL::TVariantType*>(outputType);
+ Y_ENSURE(
+ variantType->GetUnderlyingType()->IsTuple(),
+ "protobuf multi-output spec requires variant over tuple as program output type"
+ );
+ Y_ENSURE(
+ outputSpec.GetOutputsNumber() == variantType->GetAlternativesCount(),
+ "number of outputs provided by spec does not match number of variant alternatives"
+ );
+
+ auto defaultFactory = MessageFactory::generated_factory();
+
+ for (ui32 i = 0; i < variantType->GetAlternativesCount(); ++i) {
+ const auto* type = variantType->GetAlternativeType(i);
+ Y_ASSERT(type->IsStruct());
+ Y_ASSERT(OutputColumns_.size() == i && Messages_.size() == i);
+
+ OutputColumns_.push_back({});
+
+ FillFieldMappings(
+ static_cast<const NKikimr::NMiniKQL::TStructType*>(type),
+ outputSpec.GetDescriptor(i),
+ OutputColumns_.back(),
+ Nothing(),
+ outputSpec.GetSchemaOptions().ListIsOptional,
+ {}
+ );
+
+ auto factory = outputSpec.GetFactory(i);
+ if (!factory) {
+ factory = defaultFactory;
+ }
+
+ Messages_.push_back(TProtoHolder<Message>(
+ factory->GetPrototype(&outputSpec.GetDescriptor(i))->New(outputSpec.GetArena(i))
+ ));
+ }
+ }
+
+ OutputItemType<TProtobufRawMultiOutputSpec> DoConvert(TUnboxedValue value) {
+ auto index = value.GetVariantIndex();
+ auto msgPtr = Messages_[index].Get();
+ FillOutputMessage(value.GetVariantItem(), msgPtr, OutputColumns_[index], EnumPolicy_);
+ return {index, msgPtr};
+ }
+ };
+
+ /**
+ * List (or, better, stream) of unboxed values. Used as an input value in pull workers.
+ */
+ class TProtoListValue final: public TCustomListValue {
+ private:
+ mutable bool HasIterator_ = false;
+ THolder<IStream<Message*>> Underlying_;
+ TInputConverter Converter_;
+ IWorker* Worker_;
+ TScopedAlloc& ScopedAlloc_;
+
+ public:
+ TProtoListValue(
+ TMemoryUsageInfo* memInfo,
+ const TProtobufRawInputSpec& inputSpec,
+ THolder<IStream<Message*>> underlying,
+ IWorker* worker
+ )
+ : TCustomListValue(memInfo)
+ , Underlying_(std::move(underlying))
+ , Converter_(inputSpec, worker)
+ , Worker_(worker)
+ , ScopedAlloc_(Worker_->GetScopedAlloc())
+ {
+ }
+
+ ~TProtoListValue() override {
+ {
+ // This list value stored in the worker's computation graph and destroyed upon the computation
+ // graph's destruction. This brings us to an interesting situation: scoped alloc is acquired,
+ // worker and computation graph are half-way destroyed, and now it's our turn to die. The problem is,
+ // the underlying stream may own another worker. This happens when chaining programs. Now, to destroy
+ // that worker correctly, we need to release our scoped alloc (because that worker has its own
+ // computation graph and scoped alloc).
+ // By the way, note that we shouldn't interact with the worker here because worker is in the middle of
+ // its own destruction. So we're using our own reference to the scoped alloc. That reference is alive
+ // because scoped alloc destroyed after computation graph.
+ auto unguard = Unguard(ScopedAlloc_);
+ Underlying_.Destroy();
+ }
+ }
+
+ public:
+ TUnboxedValue GetListIterator() const override {
+ YQL_ENSURE(!HasIterator_, "Only one pass over input is supported");
+ HasIterator_ = true;
+ return TUnboxedValuePod(const_cast<TProtoListValue*>(this));
+ }
+
+ bool Next(TUnboxedValue& result) override {
+ const Message* message;
+ {
+ auto unguard = Unguard(ScopedAlloc_);
+ message = Underlying_->Fetch();
+ }
+
+ if (!message) {
+ return false;
+ }
+
+ Converter_.DoConvert(message, result);
+
+ return true;
+ }
+
+ EFetchStatus Fetch(TUnboxedValue& result) override {
+ if (Next(result)) {
+ return EFetchStatus::Ok;
+ } else {
+ return EFetchStatus::Finish;
+ }
+ }
+ };
+
+ /**
+ * Consumer which converts messages to unboxed values and relays them to the worker. Used as a return value
+ * of the push processor's Process function.
+ */
+ class TProtoConsumerImpl final: public IConsumer<Message*> {
+ private:
+ TWorkerHolder<IPushStreamWorker> WorkerHolder_;
+ TInputConverter Converter_;
+
+ public:
+ explicit TProtoConsumerImpl(
+ const TProtobufRawInputSpec& inputSpec,
+ TWorkerHolder<IPushStreamWorker> worker
+ )
+ : WorkerHolder_(std::move(worker))
+ , Converter_(inputSpec, WorkerHolder_.Get())
+ {
+ }
+
+ ~TProtoConsumerImpl() override {
+ with_lock(WorkerHolder_->GetScopedAlloc()) {
+ Converter_.ClearCache();
+ }
+ }
+
+ public:
+ void OnObject(Message* message) override {
+ TBindTerminator bind(WorkerHolder_->GetGraph().GetTerminator());
+
+ with_lock(WorkerHolder_->GetScopedAlloc()) {
+ TUnboxedValue result;
+ Converter_.DoConvert(message, result);
+ WorkerHolder_->Push(std::move(result));
+ }
+ }
+
+ void OnFinish() override {
+ TBindTerminator bind(WorkerHolder_->GetGraph().GetTerminator());
+
+ with_lock(WorkerHolder_->GetScopedAlloc()) {
+ WorkerHolder_->OnFinish();
+ }
+ }
+ };
+
+ /**
+ * Protobuf input stream for unboxed value streams.
+ */
+ template <typename TOutputSpec>
+ class TRawProtoStreamImpl final: public IStream<OutputItemType<TOutputSpec>> {
+ protected:
+ TWorkerHolder<IPullStreamWorker> WorkerHolder_;
+ TOutputConverter<TOutputSpec> Converter_;
+
+ public:
+ explicit TRawProtoStreamImpl(const TOutputSpec& outputSpec, TWorkerHolder<IPullStreamWorker> worker)
+ : WorkerHolder_(std::move(worker))
+ , Converter_(outputSpec, WorkerHolder_.Get())
+ {
+ }
+
+ public:
+ OutputItemType<TOutputSpec> Fetch() override {
+ TBindTerminator bind(WorkerHolder_->GetGraph().GetTerminator());
+
+ with_lock(WorkerHolder_->GetScopedAlloc()) {
+ TUnboxedValue value;
+
+ auto status = WorkerHolder_->GetOutput().Fetch(value);
+
+ YQL_ENSURE(status != EFetchStatus::Yield, "Yield is not supported in pull mode");
+
+ if (status == EFetchStatus::Finish) {
+ return TOutputSpecTraits<TOutputSpec>::StreamSentinel;
+ }
+
+ return Converter_.DoConvert(value);
+ }
+ }
+ };
+
+ /**
+ * Protobuf input stream for unboxed value lists.
+ */
+ template <typename TOutputSpec>
+ class TRawProtoListImpl final: public IStream<OutputItemType<TOutputSpec>> {
+ protected:
+ TWorkerHolder<IPullListWorker> WorkerHolder_;
+ TOutputConverter<TOutputSpec> Converter_;
+
+ public:
+ explicit TRawProtoListImpl(const TOutputSpec& outputSpec, TWorkerHolder<IPullListWorker> worker)
+ : WorkerHolder_(std::move(worker))
+ , Converter_(outputSpec, WorkerHolder_.Get())
+ {
+ }
+
+ public:
+ OutputItemType<TOutputSpec> Fetch() override {
+ TBindTerminator bind(WorkerHolder_->GetGraph().GetTerminator());
+
+ with_lock(WorkerHolder_->GetScopedAlloc()) {
+ TUnboxedValue value;
+
+ if (!WorkerHolder_->GetOutputIterator().Next(value)) {
+ return TOutputSpecTraits<TOutputSpec>::StreamSentinel;
+ }
+
+ return Converter_.DoConvert(value);
+ }
+ }
+ };
+
+ /**
+ * Push relay used to convert generated unboxed value to a message and push it to the user's consumer.
+ */
+ template <typename TOutputSpec>
+ class TPushRelayImpl: public IConsumer<const TUnboxedValue*> {
+ private:
+ THolder<IConsumer<OutputItemType<TOutputSpec>>> Underlying_;
+ TOutputConverter<TOutputSpec> Converter_;
+ IWorker* Worker_;
+
+ public:
+ TPushRelayImpl(
+ const TOutputSpec& outputSpec,
+ IPushStreamWorker* worker,
+ THolder<IConsumer<OutputItemType<TOutputSpec>>> underlying
+ )
+ : Underlying_(std::move(underlying))
+ , Converter_(outputSpec, worker)
+ , Worker_(worker)
+ {
+ }
+
+ // If you've read a comment in the TProtoListValue's destructor, you may be wondering why don't we do the
+ // same trick here. Well, that's because in push mode, consumer is destroyed before acquiring scoped alloc and
+ // destroying computation graph.
+
+ public:
+ void OnObject(const TUnboxedValue* value) override {
+ OutputItemType<TOutputSpec> message = Converter_.DoConvert(*value);
+ auto unguard = Unguard(Worker_->GetScopedAlloc());
+ Underlying_->OnObject(message);
+ }
+
+ void OnFinish() override {
+ auto unguard = Unguard(Worker_->GetScopedAlloc());
+ Underlying_->OnFinish();
+ }
+ };
+}
+
+using ConsumerType = TInputSpecTraits<TProtobufRawInputSpec>::TConsumerType;
+
+void TInputSpecTraits<TProtobufRawInputSpec>::PreparePullStreamWorker(
+ const TProtobufRawInputSpec& inputSpec,
+ IPullStreamWorker* worker,
+ THolder<IStream<Message*>> stream
+) {
+ with_lock(worker->GetScopedAlloc()) {
+ worker->SetInput(
+ worker->GetGraph().GetHolderFactory().Create<TProtoListValue>(inputSpec, std::move(stream), worker), 0);
+ }
+}
+
+void TInputSpecTraits<TProtobufRawInputSpec>::PreparePullListWorker(
+ const TProtobufRawInputSpec& inputSpec,
+ IPullListWorker* worker,
+ THolder<IStream<Message*>> stream
+) {
+ with_lock(worker->GetScopedAlloc()) {
+ worker->SetInput(
+ worker->GetGraph().GetHolderFactory().Create<TProtoListValue>(inputSpec, std::move(stream), worker), 0);
+ }
+}
+
+ConsumerType TInputSpecTraits<TProtobufRawInputSpec>::MakeConsumer(
+ const TProtobufRawInputSpec& inputSpec,
+ TWorkerHolder<IPushStreamWorker> worker
+) {
+ return MakeHolder<TProtoConsumerImpl>(inputSpec, std::move(worker));
+}
+
+template <typename TOutputSpec>
+using PullStreamReturnType = typename TOutputSpecTraits<TOutputSpec>::TPullStreamReturnType;
+template <typename TOutputSpec>
+using PullListReturnType = typename TOutputSpecTraits<TOutputSpec>::TPullListReturnType;
+
+PullStreamReturnType<TProtobufRawOutputSpec> TOutputSpecTraits<TProtobufRawOutputSpec>::ConvertPullStreamWorkerToOutputType(
+ const TProtobufRawOutputSpec& outputSpec,
+ TWorkerHolder<IPullStreamWorker> worker
+) {
+ return MakeHolder<TRawProtoStreamImpl<TProtobufRawOutputSpec>>(outputSpec, std::move(worker));
+}
+
+PullListReturnType<TProtobufRawOutputSpec> TOutputSpecTraits<TProtobufRawOutputSpec>::ConvertPullListWorkerToOutputType(
+ const TProtobufRawOutputSpec& outputSpec,
+ TWorkerHolder<IPullListWorker> worker
+) {
+ return MakeHolder<TRawProtoListImpl<TProtobufRawOutputSpec>>(outputSpec, std::move(worker));
+}
+
+void TOutputSpecTraits<TProtobufRawOutputSpec>::SetConsumerToWorker(
+ const TProtobufRawOutputSpec& outputSpec,
+ IPushStreamWorker* worker,
+ THolder<IConsumer<TOutputItemType>> consumer
+) {
+ worker->SetConsumer(MakeHolder<TPushRelayImpl<TProtobufRawOutputSpec>>(outputSpec, worker, std::move(consumer)));
+}
+
+PullStreamReturnType<TProtobufRawMultiOutputSpec> TOutputSpecTraits<TProtobufRawMultiOutputSpec>::ConvertPullStreamWorkerToOutputType(
+ const TProtobufRawMultiOutputSpec& outputSpec,
+ TWorkerHolder<IPullStreamWorker> worker
+) {
+ return MakeHolder<TRawProtoStreamImpl<TProtobufRawMultiOutputSpec>>(outputSpec, std::move(worker));
+}
+
+PullListReturnType<TProtobufRawMultiOutputSpec> TOutputSpecTraits<TProtobufRawMultiOutputSpec>::ConvertPullListWorkerToOutputType(
+ const TProtobufRawMultiOutputSpec& outputSpec,
+ TWorkerHolder<IPullListWorker> worker
+) {
+ return MakeHolder<TRawProtoListImpl<TProtobufRawMultiOutputSpec>>(outputSpec, std::move(worker));
+}
+
+void TOutputSpecTraits<TProtobufRawMultiOutputSpec>::SetConsumerToWorker(
+ const TProtobufRawMultiOutputSpec& outputSpec,
+ IPushStreamWorker* worker,
+ THolder<IConsumer<TOutputItemType>> consumer
+) {
+ worker->SetConsumer(MakeHolder<TPushRelayImpl<TProtobufRawMultiOutputSpec>>(outputSpec, worker, std::move(consumer)));
+}
diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/spec.h b/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/spec.h
new file mode 100644
index 0000000000..2a8fd19648
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/spec.h
@@ -0,0 +1,257 @@
+#pragma once
+
+#include <ydb/library/yql/public/purecalc/common/interface.h>
+#include <ydb/library/yql/public/purecalc/helpers/protobuf/schema_from_proto.h>
+
+#include <google/protobuf/message.h>
+
+#include <util/generic/maybe.h>
+
+namespace NYql {
+ namespace NPureCalc {
+ /**
+ * Processing mode for working with raw protobuf message inputs.
+ *
+ * In this mode purecalc accept pointers to abstract protobuf messages and processes them using the reflection
+ * mechanism. All passed messages should have the same descriptor (the one you pass to the constructor
+ * of the input spec).
+ *
+ * All working modes are implemented. In pull stream and pull list modes a program would accept a single object
+ * stream of const protobuf messages. In push mode, a program will return a consumer of const protobuf messages.
+ *
+ * The program synopsis follows:
+ *
+ * @code
+ * ... TPullStreamProgram::Apply(IStream<google::protobuf::Message*>);
+ * ... TPullListProgram::Apply(IStream<google::protobuf::Message*>);
+ * TConsumer<google::protobuf::Message*> TPushStreamProgram::Apply(...);
+ * @endcode
+ */
+ class TProtobufRawInputSpec: public TInputSpecBase {
+ private:
+ const google::protobuf::Descriptor& Descriptor_;
+ const TMaybe<TString> TimestampColumn_;
+ const TProtoSchemaOptions SchemaOptions_;
+ mutable TVector<NYT::TNode> SavedSchemas_;
+
+ public:
+ /**
+ * Build input spec and associate the given message descriptor.
+ */
+ explicit TProtobufRawInputSpec(
+ const google::protobuf::Descriptor& descriptor,
+ const TMaybe<TString>& timestampColumn = Nothing(),
+ const TProtoSchemaOptions& options = {}
+ );
+
+ public:
+ const TVector<NYT::TNode>& GetSchemas() const override;
+
+ /**
+ * Get the descriptor associated with this spec.
+ */
+ const google::protobuf::Descriptor& GetDescriptor() const;
+
+ const TMaybe<TString>& GetTimestampColumn() const;
+
+ /*
+ * Get options that customize input struct type building.
+ */
+ const TProtoSchemaOptions& GetSchemaOptions() const;
+ };
+
+ /**
+ * Processing mode for working with raw protobuf message outputs.
+ *
+ * In this mode purecalc yields pointers to abstract protobuf messages. All generated messages share the same
+ * descriptor so they can be safely converted into an appropriate message type.
+ *
+ * Note that one should not expect that the returned pointer will be valid forever; in can (and will) become
+ * outdated once a new output is requested/pushed.
+ *
+ * All working modes are implemented. In pull stream and pull list modes a program will return an object
+ * stream of non-const protobuf messages. In push mode, it will accept a single consumer of non-const
+ * messages.
+ *
+ * The program synopsis follows:
+ *
+ * @code
+ * IStream<google::protobuf::Message*> TPullStreamProgram::Apply(...);
+ * IStream<google::protobuf::Message*> TPullListProgram::Apply(...);
+ * ... TPushStreamProgram::Apply(TConsumer<google::protobuf::Message*>);
+ * @endcode
+ */
+ class TProtobufRawOutputSpec: public TOutputSpecBase {
+ private:
+ const google::protobuf::Descriptor& Descriptor_;
+ google::protobuf::MessageFactory* Factory_;
+ TProtoSchemaOptions SchemaOptions_;
+ google::protobuf::Arena* Arena_;
+ mutable TMaybe<NYT::TNode> SavedSchema_;
+
+ public:
+ /**
+ * Build output spec and associate the given message descriptor and maybe the given message factory.
+ */
+ explicit TProtobufRawOutputSpec(
+ const google::protobuf::Descriptor& descriptor,
+ google::protobuf::MessageFactory* = nullptr,
+ const TProtoSchemaOptions& options = {},
+ google::protobuf::Arena* arena = nullptr
+ );
+
+ public:
+ const NYT::TNode& GetSchema() const override;
+
+ /**
+ * Get the descriptor associated with this spec.
+ */
+ const google::protobuf::Descriptor& GetDescriptor() const;
+
+ /**
+ * Set a new message factory which will be used to generate messages. Pass a null pointer to use the
+ * default factory.
+ */
+ void SetFactory(google::protobuf::MessageFactory*);
+
+ /**
+ * Get the message factory which is currently associated with this spec.
+ */
+ google::protobuf::MessageFactory* GetFactory() const;
+
+ /**
+ * Set a new arena which will be used to generate messages. Pass a null pointer to create on the heap.
+ */
+ void SetArena(google::protobuf::Arena*);
+
+ /**
+ * Get the arena which is currently associated with this spec.
+ */
+ google::protobuf::Arena* GetArena() const;
+
+ /**
+ * Get options that customize output struct type building.
+ */
+ const TProtoSchemaOptions& GetSchemaOptions() const;
+ };
+
+ /**
+ * Processing mode for working with raw protobuf messages and several outputs.
+ *
+ * The program synopsis follows:
+ *
+ * @code
+ * IStream<std::pair<ui32, google::protobuf::Message*>> TPullStreamProgram::Apply(...);
+ * IStream<std::pair<ui32, google::protobuf::Message*>> TPullListProgram::Apply(...);
+ * ... TPushStreamProgram::Apply(TConsumer<std::pair<ui32, google::protobuf::Message*>>);
+ * @endcode
+ */
+ class TProtobufRawMultiOutputSpec: public TOutputSpecBase {
+ private:
+ TVector<const google::protobuf::Descriptor*> Descriptors_;
+ TVector<google::protobuf::MessageFactory*> Factories_;
+ const TProtoSchemaOptions SchemaOptions_;
+ TVector<google::protobuf::Arena*> Arenas_;
+ mutable NYT::TNode SavedSchema_;
+
+ public:
+ TProtobufRawMultiOutputSpec(
+ TVector<const google::protobuf::Descriptor*>,
+ TMaybe<TVector<google::protobuf::MessageFactory*>> = {},
+ const TProtoSchemaOptions& options = {},
+ TMaybe<TVector<google::protobuf::Arena*>> arenas = {}
+ );
+
+ public:
+ const NYT::TNode& GetSchema() const override;
+
+ /**
+ * Get the descriptor associated with given output.
+ */
+ const google::protobuf::Descriptor& GetDescriptor(ui32) const;
+
+ /**
+ * Set a new message factory for given output. It will be used to generate messages for this output.
+ */
+ void SetFactory(ui32, google::protobuf::MessageFactory*);
+
+ /**
+ * Get the message factory which is currently associated with given output.
+ */
+ google::protobuf::MessageFactory* GetFactory(ui32) const;
+
+ /**
+ * Set a new arena for given output. It will be used to generate messages for this output.
+ */
+ void SetArena(ui32, google::protobuf::Arena*);
+
+ /**
+ * Get the arena which is currently associated with given output.
+ */
+ google::protobuf::Arena* GetArena(ui32) const;
+
+ /**
+ * Get number of outputs for this spec.
+ */
+ ui32 GetOutputsNumber() const;
+
+ /**
+ * Get options that customize output struct type building.
+ */
+ const TProtoSchemaOptions& GetSchemaOptions() const;
+ };
+
+ template <>
+ struct TInputSpecTraits<TProtobufRawInputSpec> {
+ static const constexpr bool IsPartial = false;
+
+ static const constexpr bool SupportPullStreamMode = true;
+ static const constexpr bool SupportPullListMode = true;
+ static const constexpr bool SupportPushStreamMode = true;
+
+ using TConsumerType = THolder<IConsumer<google::protobuf::Message*>>;
+
+ static void PreparePullStreamWorker(const TProtobufRawInputSpec&, IPullStreamWorker*, THolder<IStream<google::protobuf::Message*>>);
+ static void PreparePullListWorker(const TProtobufRawInputSpec&, IPullListWorker*, THolder<IStream<google::protobuf::Message*>>);
+ static TConsumerType MakeConsumer(const TProtobufRawInputSpec&, TWorkerHolder<IPushStreamWorker>);
+ };
+
+ template <>
+ struct TOutputSpecTraits<TProtobufRawOutputSpec> {
+ static const constexpr bool IsPartial = false;
+
+ static const constexpr bool SupportPullStreamMode = true;
+ static const constexpr bool SupportPullListMode = true;
+ static const constexpr bool SupportPushStreamMode = true;
+
+ using TOutputItemType = google::protobuf::Message*;
+ using TPullStreamReturnType = THolder<IStream<TOutputItemType>>;
+ using TPullListReturnType = THolder<IStream<TOutputItemType>>;
+
+ static const constexpr TOutputItemType StreamSentinel = nullptr;
+
+ static TPullStreamReturnType ConvertPullStreamWorkerToOutputType(const TProtobufRawOutputSpec&, TWorkerHolder<IPullStreamWorker>);
+ static TPullListReturnType ConvertPullListWorkerToOutputType(const TProtobufRawOutputSpec&, TWorkerHolder<IPullListWorker>);
+ static void SetConsumerToWorker(const TProtobufRawOutputSpec&, IPushStreamWorker*, THolder<IConsumer<TOutputItemType>>);
+ };
+
+ template <>
+ struct TOutputSpecTraits<TProtobufRawMultiOutputSpec> {
+ static const constexpr bool IsPartial = false;
+
+ static const constexpr bool SupportPullStreamMode = true;
+ static const constexpr bool SupportPullListMode = true;
+ static const constexpr bool SupportPushStreamMode = true;
+
+ using TOutputItemType = std::pair<ui32, google::protobuf::Message*>;
+ using TPullStreamReturnType = THolder<IStream<TOutputItemType>>;
+ using TPullListReturnType = THolder<IStream<TOutputItemType>>;
+
+ static const constexpr TOutputItemType StreamSentinel = {0, nullptr};
+
+ static TPullStreamReturnType ConvertPullStreamWorkerToOutputType(const TProtobufRawMultiOutputSpec&, TWorkerHolder<IPullStreamWorker>);
+ static TPullListReturnType ConvertPullListWorkerToOutputType(const TProtobufRawMultiOutputSpec&, TWorkerHolder<IPullListWorker>);
+ static void SetConsumerToWorker(const TProtobufRawMultiOutputSpec&, IPushStreamWorker*, THolder<IConsumer<TOutputItemType>>);
+ };
+ }
+}
diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/ya.make b/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/ya.make
new file mode 100644
index 0000000000..ad72bbf43a
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/ya.make
@@ -0,0 +1,16 @@
+LIBRARY()
+
+PEERDIR(
+ ydb/library/yql/public/purecalc/common
+ ydb/library/yql/public/purecalc/helpers/protobuf
+)
+
+SRCS(
+ proto_holder.cpp
+ spec.cpp
+ spec.h
+)
+
+YQL_LAST_ABI_VERSION()
+
+END()
diff --git a/ydb/library/yql/public/purecalc/io_specs/ut/ya.make b/ydb/library/yql/public/purecalc/io_specs/ut/ya.make
new file mode 100644
index 0000000000..b0179f3af0
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/io_specs/ut/ya.make
@@ -0,0 +1,4 @@
+RECURSE(
+ ../mkql/ut
+ ../protobuf/ut
+)
diff --git a/ydb/library/yql/public/purecalc/io_specs/ya.make b/ydb/library/yql/public/purecalc/io_specs/ya.make
new file mode 100644
index 0000000000..c30a69d40b
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/io_specs/ya.make
@@ -0,0 +1,9 @@
+RECURSE(
+ mkql
+ protobuf
+ protobuf_raw
+)
+
+RECURSE_FOR_TESTS(
+ ut
+)
diff --git a/ydb/library/yql/public/purecalc/purecalc.cpp b/ydb/library/yql/public/purecalc/purecalc.cpp
new file mode 100644
index 0000000000..80cfd39d96
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/purecalc.cpp
@@ -0,0 +1 @@
+#include "purecalc.h"
diff --git a/ydb/library/yql/public/purecalc/purecalc.h b/ydb/library/yql/public/purecalc/purecalc.h
new file mode 100644
index 0000000000..83bd8a7b84
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/purecalc.h
@@ -0,0 +1,3 @@
+#pragma once
+
+#include "common/interface.h"
diff --git a/ydb/library/yql/public/purecalc/ut/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/public/purecalc/ut/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..4a4c7b68a2
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/ut/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,78 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(lib)
+add_subdirectory(protos)
+
+add_executable(ydb-library-yql-public-purecalc-ut)
+target_compile_options(ydb-library-yql-public-purecalc-ut PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_link_libraries(ydb-library-yql-public-purecalc-ut PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-cpuid_check
+ cpp-testing-unittest_main
+ yql-public-purecalc
+ purecalc-io_specs-protobuf
+ purecalc-ut-protos
+)
+target_link_options(ydb-library-yql-public-purecalc-ut PRIVATE
+ -Wl,-platform_version,macos,11.0,11.0
+ -fPIC
+ -fPIC
+ -framework
+ CoreFoundation
+)
+target_sources(ydb-library-yql-public-purecalc-ut PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/fake_spec.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_schema.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_sexpr.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_sql.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_udf.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_user_data.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_eval.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_pool.cpp
+)
+set_property(
+ TARGET
+ ydb-library-yql-public-purecalc-ut
+ PROPERTY
+ SPLIT_FACTOR
+ 1
+)
+add_yunittest(
+ NAME
+ ydb-library-yql-public-purecalc-ut
+ TEST_TARGET
+ ydb-library-yql-public-purecalc-ut
+ TEST_ARG
+ --print-before-suite
+ --print-before-test
+ --fork-tests
+ --print-times
+ --show-fails
+)
+set_yunittest_property(
+ TEST
+ ydb-library-yql-public-purecalc-ut
+ PROPERTY
+ LABELS
+ MEDIUM
+)
+set_yunittest_property(
+ TEST
+ ydb-library-yql-public-purecalc-ut
+ PROPERTY
+ PROCESSORS
+ 1
+)
+target_allocator(ydb-library-yql-public-purecalc-ut
+ system_allocator
+)
+vcs_info(ydb-library-yql-public-purecalc-ut)
diff --git a/ydb/library/yql/public/purecalc/ut/CMakeLists.linux-aarch64.txt b/ydb/library/yql/public/purecalc/ut/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..01d22ca88f
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/ut/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,81 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(lib)
+add_subdirectory(protos)
+
+add_executable(ydb-library-yql-public-purecalc-ut)
+target_compile_options(ydb-library-yql-public-purecalc-ut PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_link_libraries(ydb-library-yql-public-purecalc-ut PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ cpp-testing-unittest_main
+ yql-public-purecalc
+ purecalc-io_specs-protobuf
+ purecalc-ut-protos
+)
+target_link_options(ydb-library-yql-public-purecalc-ut PRIVATE
+ -ldl
+ -lrt
+ -Wl,--no-as-needed
+ -fPIC
+ -fPIC
+ -lpthread
+ -lrt
+ -ldl
+)
+target_sources(ydb-library-yql-public-purecalc-ut PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/fake_spec.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_schema.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_sexpr.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_sql.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_udf.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_user_data.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_eval.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_pool.cpp
+)
+set_property(
+ TARGET
+ ydb-library-yql-public-purecalc-ut
+ PROPERTY
+ SPLIT_FACTOR
+ 1
+)
+add_yunittest(
+ NAME
+ ydb-library-yql-public-purecalc-ut
+ TEST_TARGET
+ ydb-library-yql-public-purecalc-ut
+ TEST_ARG
+ --print-before-suite
+ --print-before-test
+ --fork-tests
+ --print-times
+ --show-fails
+)
+set_yunittest_property(
+ TEST
+ ydb-library-yql-public-purecalc-ut
+ PROPERTY
+ LABELS
+ MEDIUM
+)
+set_yunittest_property(
+ TEST
+ ydb-library-yql-public-purecalc-ut
+ PROPERTY
+ PROCESSORS
+ 1
+)
+target_allocator(ydb-library-yql-public-purecalc-ut
+ cpp-malloc-jemalloc
+)
+vcs_info(ydb-library-yql-public-purecalc-ut)
diff --git a/ydb/library/yql/public/purecalc/ut/CMakeLists.linux-x86_64.txt b/ydb/library/yql/public/purecalc/ut/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..5c64f772cb
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/ut/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,83 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(lib)
+add_subdirectory(protos)
+
+add_executable(ydb-library-yql-public-purecalc-ut)
+target_compile_options(ydb-library-yql-public-purecalc-ut PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_link_libraries(ydb-library-yql-public-purecalc-ut PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-cpuid_check
+ cpp-testing-unittest_main
+ yql-public-purecalc
+ purecalc-io_specs-protobuf
+ purecalc-ut-protos
+)
+target_link_options(ydb-library-yql-public-purecalc-ut PRIVATE
+ -ldl
+ -lrt
+ -Wl,--no-as-needed
+ -fPIC
+ -fPIC
+ -lpthread
+ -lrt
+ -ldl
+)
+target_sources(ydb-library-yql-public-purecalc-ut PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/fake_spec.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_schema.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_sexpr.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_sql.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_udf.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_user_data.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_eval.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_pool.cpp
+)
+set_property(
+ TARGET
+ ydb-library-yql-public-purecalc-ut
+ PROPERTY
+ SPLIT_FACTOR
+ 1
+)
+add_yunittest(
+ NAME
+ ydb-library-yql-public-purecalc-ut
+ TEST_TARGET
+ ydb-library-yql-public-purecalc-ut
+ TEST_ARG
+ --print-before-suite
+ --print-before-test
+ --fork-tests
+ --print-times
+ --show-fails
+)
+set_yunittest_property(
+ TEST
+ ydb-library-yql-public-purecalc-ut
+ PROPERTY
+ LABELS
+ MEDIUM
+)
+set_yunittest_property(
+ TEST
+ ydb-library-yql-public-purecalc-ut
+ PROPERTY
+ PROCESSORS
+ 1
+)
+target_allocator(ydb-library-yql-public-purecalc-ut
+ cpp-malloc-tcmalloc
+ libs-tcmalloc-no_percpu_cache
+)
+vcs_info(ydb-library-yql-public-purecalc-ut)
diff --git a/ydb/library/yql/public/purecalc/ut/CMakeLists.txt b/ydb/library/yql/public/purecalc/ut/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/ut/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/ydb/library/yql/public/purecalc/ut/CMakeLists.windows-x86_64.txt b/ydb/library/yql/public/purecalc/ut/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..eb1ce4c4c3
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/ut/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,71 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(lib)
+add_subdirectory(protos)
+
+add_executable(ydb-library-yql-public-purecalc-ut)
+target_compile_options(ydb-library-yql-public-purecalc-ut PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_link_libraries(ydb-library-yql-public-purecalc-ut PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-cpuid_check
+ cpp-testing-unittest_main
+ yql-public-purecalc
+ purecalc-io_specs-protobuf
+ purecalc-ut-protos
+)
+target_sources(ydb-library-yql-public-purecalc-ut PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/fake_spec.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_schema.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_sexpr.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_sql.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_udf.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_user_data.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_eval.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_pool.cpp
+)
+set_property(
+ TARGET
+ ydb-library-yql-public-purecalc-ut
+ PROPERTY
+ SPLIT_FACTOR
+ 1
+)
+add_yunittest(
+ NAME
+ ydb-library-yql-public-purecalc-ut
+ TEST_TARGET
+ ydb-library-yql-public-purecalc-ut
+ TEST_ARG
+ --print-before-suite
+ --print-before-test
+ --fork-tests
+ --print-times
+ --show-fails
+)
+set_yunittest_property(
+ TEST
+ ydb-library-yql-public-purecalc-ut
+ PROPERTY
+ LABELS
+ MEDIUM
+)
+set_yunittest_property(
+ TEST
+ ydb-library-yql-public-purecalc-ut
+ PROPERTY
+ PROCESSORS
+ 1
+)
+target_allocator(ydb-library-yql-public-purecalc-ut
+ system_allocator
+)
+vcs_info(ydb-library-yql-public-purecalc-ut)
diff --git a/ydb/library/yql/public/purecalc/ut/empty_stream.h b/ydb/library/yql/public/purecalc/ut/empty_stream.h
new file mode 100644
index 0000000000..246aabd423
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/ut/empty_stream.h
@@ -0,0 +1,20 @@
+#pragma once
+
+#include <ydb/library/yql/public/purecalc/purecalc.h>
+
+namespace NYql {
+ namespace NPureCalc {
+ template <typename T>
+ class TEmptyStreamImpl: public IStream<T> {
+ public:
+ T Fetch() override {
+ return nullptr;
+ }
+ };
+
+ template <typename T>
+ THolder<IStream<T>> EmptyStream() {
+ return MakeHolder<TEmptyStreamImpl<T>>();
+ }
+ }
+}
diff --git a/ydb/library/yql/public/purecalc/ut/fake_spec.cpp b/ydb/library/yql/public/purecalc/ut/fake_spec.cpp
new file mode 100644
index 0000000000..4e45e76bc1
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/ut/fake_spec.cpp
@@ -0,0 +1,36 @@
+#include "fake_spec.h"
+
+namespace NYql {
+ namespace NPureCalc {
+ NYT::TNode MakeFakeSchema() {
+ auto itemType = NYT::TNode::CreateList();
+ itemType.Add("DataType");
+ itemType.Add("Int32");
+
+ auto itemNode = NYT::TNode::CreateList();
+ itemNode.Add("Name");
+ itemNode.Add(std::move(itemType));
+
+ auto items = NYT::TNode::CreateList();
+ items.Add(std::move(itemNode));
+
+ auto schema = NYT::TNode::CreateList();
+ schema.Add("StructType");
+ schema.Add(std::move(items));
+
+ return schema;
+ }
+
+ TFakeInputSpec FakeIS(ui32 inputsNumber) {
+ auto spec = TFakeInputSpec();
+ spec.Schemas = TVector<NYT::TNode>(inputsNumber, MakeFakeSchema());
+ return spec;
+ }
+
+ TFakeOutputSpec FakeOS() {
+ auto spec = TFakeOutputSpec();
+ spec.Schema = MakeFakeSchema();
+ return spec;
+ }
+ }
+}
diff --git a/ydb/library/yql/public/purecalc/ut/fake_spec.h b/ydb/library/yql/public/purecalc/ut/fake_spec.h
new file mode 100644
index 0000000000..0b0e9e02ec
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/ut/fake_spec.h
@@ -0,0 +1,54 @@
+#pragma once
+
+#include <ydb/library/yql/public/purecalc/purecalc.h>
+
+namespace NYql {
+ namespace NPureCalc {
+ class TFakeInputSpec: public TInputSpecBase {
+ public:
+ TVector<NYT::TNode> Schemas = {NYT::TNode::CreateList()};
+
+ public:
+ const TVector<NYT::TNode>& GetSchemas() const override {
+ return Schemas;
+ }
+ };
+
+ class TFakeOutputSpec: public TOutputSpecBase {
+ public:
+ NYT::TNode Schema = NYT::TNode::CreateList();
+
+ public:
+ const NYT::TNode& GetSchema() const override {
+ return Schema;
+ }
+ };
+
+ template <>
+ struct TInputSpecTraits<TFakeInputSpec> {
+ static const constexpr bool IsPartial = false;
+
+ static const constexpr bool SupportPullStreamMode = false;
+ static const constexpr bool SupportPullListMode = false;
+ static const constexpr bool SupportPushStreamMode = false;
+
+ using TConsumerType = void;
+ };
+
+ template <>
+ struct TOutputSpecTraits<TFakeOutputSpec> {
+ static const constexpr bool IsPartial = false;
+
+ static const constexpr bool SupportPullStreamMode = false;
+ static const constexpr bool SupportPullListMode = false;
+ static const constexpr bool SupportPushStreamMode = false;
+
+ using TPullStreamReturnType = void;
+ using TPullListReturnType = void;
+ };
+
+ NYT::TNode MakeFakeSchema();
+ TFakeInputSpec FakeIS(ui32 inputsNumber = 1);
+ TFakeOutputSpec FakeOS();
+ }
+}
diff --git a/ydb/library/yql/public/purecalc/ut/lib/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/public/purecalc/ut/lib/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..36bfa6b4d5
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/ut/lib/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,19 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(purecalc-ut-lib)
+target_link_libraries(purecalc-ut-lib PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-yson
+ cpp-yson-node
+)
+target_sources(purecalc-ut-lib PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/lib/helpers.cpp
+)
diff --git a/ydb/library/yql/public/purecalc/ut/lib/CMakeLists.linux-aarch64.txt b/ydb/library/yql/public/purecalc/ut/lib/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..b32b5970c1
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/ut/lib/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,20 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(purecalc-ut-lib)
+target_link_libraries(purecalc-ut-lib PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-yson
+ cpp-yson-node
+)
+target_sources(purecalc-ut-lib PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/lib/helpers.cpp
+)
diff --git a/ydb/library/yql/public/purecalc/ut/lib/CMakeLists.linux-x86_64.txt b/ydb/library/yql/public/purecalc/ut/lib/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..b32b5970c1
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/ut/lib/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,20 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(purecalc-ut-lib)
+target_link_libraries(purecalc-ut-lib PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-yson
+ cpp-yson-node
+)
+target_sources(purecalc-ut-lib PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/lib/helpers.cpp
+)
diff --git a/ydb/library/yql/public/purecalc/ut/lib/CMakeLists.txt b/ydb/library/yql/public/purecalc/ut/lib/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/ut/lib/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/ydb/library/yql/public/purecalc/ut/lib/CMakeLists.windows-x86_64.txt b/ydb/library/yql/public/purecalc/ut/lib/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..36bfa6b4d5
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/ut/lib/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,19 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(purecalc-ut-lib)
+target_link_libraries(purecalc-ut-lib PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-yson
+ cpp-yson-node
+)
+target_sources(purecalc-ut-lib PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/lib/helpers.cpp
+)
diff --git a/ydb/library/yql/public/purecalc/ut/lib/helpers.cpp b/ydb/library/yql/public/purecalc/ut/lib/helpers.cpp
new file mode 100644
index 0000000000..cef9a99523
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/ut/lib/helpers.cpp
@@ -0,0 +1,55 @@
+#include "helpers.h"
+
+#include <library/cpp/yson/writer.h>
+
+#include <library/cpp/yson/node/node_visitor.h>
+
+#include <util/string/ascii.h>
+#include <util/generic/hash_set.h>
+
+
+namespace NYql {
+ namespace NPureCalc {
+ namespace NPrivate {
+ NYT::TNode GetSchema(
+ const TVector<TString>& fields,
+ const TVector<TString>& optionalFields
+ ) {
+ THashSet<TString> optionalFilter {
+ optionalFields.begin(), optionalFields.end()
+ };
+
+ NYT::TNode members {NYT::TNode::CreateList()};
+
+ auto addField = [&] (const TString& name, const TString& type) {
+ auto typeNode = NYT::TNode::CreateList()
+ .Add("DataType")
+ .Add(type);
+
+ if (optionalFilter.contains(name)) {
+ typeNode = NYT::TNode::CreateList()
+ .Add("OptionalType")
+ .Add(typeNode);
+ }
+
+ members.Add(NYT::TNode::CreateList()
+ .Add(name)
+ .Add(typeNode)
+ );
+ };
+
+ for (const auto& field: fields) {
+ TString type {field};
+ type[0] = AsciiToUpper(type[0]);
+ addField(field, type);
+ }
+
+ NYT::TNode schema = NYT::TNode::CreateList()
+ .Add("StructType")
+ .Add(members);
+
+ return schema;
+ }
+ }
+ }
+}
diff --git a/ydb/library/yql/public/purecalc/ut/lib/helpers.h b/ydb/library/yql/public/purecalc/ut/lib/helpers.h
new file mode 100644
index 0000000000..53a22661ec
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/ut/lib/helpers.h
@@ -0,0 +1,18 @@
+#pragma once
+
+#include <library/cpp/yson/node/node.h>
+#include <util/generic/string.h>
+#include <util/generic/vector.h>
+#include <util/stream/str.h>
+
+
+namespace NYql {
+ namespace NPureCalc {
+ namespace NPrivate {
+ NYT::TNode GetSchema(
+ const TVector<TString>& fields,
+ const TVector<TString>& optionalFields = {}
+ );
+ }
+ }
+}
diff --git a/ydb/library/yql/public/purecalc/ut/lib/ya.make b/ydb/library/yql/public/purecalc/ut/lib/ya.make
new file mode 100644
index 0000000000..df3ba8eab2
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/ut/lib/ya.make
@@ -0,0 +1,13 @@
+LIBRARY()
+
+PEERDIR(
+ library/cpp/yson
+ library/cpp/yson/node
+)
+
+SRCS(
+ helpers.cpp
+ helpers.h
+)
+
+END()
diff --git a/ydb/library/yql/public/purecalc/ut/protos/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/public/purecalc/ut/protos/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..12f10544f8
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/ut/protos/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,43 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(purecalc-ut-protos)
+target_link_libraries(purecalc-ut-protos PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-protobuf
+)
+target_proto_messages(purecalc-ut-protos PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/protos/test_structs.proto
+)
+target_proto_addincls(purecalc-ut-protos
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(purecalc-ut-protos
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/ydb/library/yql/public/purecalc/ut/protos/CMakeLists.linux-aarch64.txt b/ydb/library/yql/public/purecalc/ut/protos/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..806ee80165
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/ut/protos/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,44 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(purecalc-ut-protos)
+target_link_libraries(purecalc-ut-protos PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-protobuf
+)
+target_proto_messages(purecalc-ut-protos PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/protos/test_structs.proto
+)
+target_proto_addincls(purecalc-ut-protos
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(purecalc-ut-protos
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/ydb/library/yql/public/purecalc/ut/protos/CMakeLists.linux-x86_64.txt b/ydb/library/yql/public/purecalc/ut/protos/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..806ee80165
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/ut/protos/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,44 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(purecalc-ut-protos)
+target_link_libraries(purecalc-ut-protos PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-protobuf
+)
+target_proto_messages(purecalc-ut-protos PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/protos/test_structs.proto
+)
+target_proto_addincls(purecalc-ut-protos
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(purecalc-ut-protos
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/ydb/library/yql/public/purecalc/ut/protos/CMakeLists.txt b/ydb/library/yql/public/purecalc/ut/protos/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/ut/protos/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/ydb/library/yql/public/purecalc/ut/protos/CMakeLists.windows-x86_64.txt b/ydb/library/yql/public/purecalc/ut/protos/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..12f10544f8
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/ut/protos/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,43 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(purecalc-ut-protos)
+target_link_libraries(purecalc-ut-protos PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-protobuf
+)
+target_proto_messages(purecalc-ut-protos PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/protos/test_structs.proto
+)
+target_proto_addincls(purecalc-ut-protos
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(purecalc-ut-protos
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/ydb/library/yql/public/purecalc/ut/protos/test_structs.proto b/ydb/library/yql/public/purecalc/ut/protos/test_structs.proto
new file mode 100644
index 0000000000..66593005a5
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/ut/protos/test_structs.proto
@@ -0,0 +1,122 @@
+package NPureCalcProto;
+
+message TUnparsed {
+ required string S = 1;
+}
+
+message TParsed {
+ required int32 A = 1;
+ optional int32 B = 2;
+ required int32 C = 3;
+}
+
+message TPartial {
+ required int32 X = 1;
+}
+
+message TSimpleMessage {
+ required int32 X = 1;
+}
+
+message TNamedSimpleMessage {
+ required int32 X = 1;
+ required bytes Name = 2;
+}
+
+message TStringMessage {
+ required string X = 1;
+}
+
+message TAllTypes {
+ required double FDouble = 1;
+ required float FFloat = 2;
+ required int64 FInt64 = 3;
+ required sfixed64 FSfixed64 = 4;
+ required sint64 FSint64 = 5;
+ required uint64 FUint64 = 6;
+ required fixed64 FFixed64 = 7;
+ required int32 FInt32 = 8;
+ required sfixed32 FSfixed32 = 9;
+ required sint32 FSint32 = 10;
+ required uint32 FUint32 = 11;
+ required fixed32 FFixed32 = 12;
+ required bool FBool = 13;
+ required string FString = 14;
+ required bytes FBytes = 15;
+}
+
+message TOptionalAllTypes {
+ optional double FDouble = 1;
+ optional float FFloat = 2;
+ optional int64 FInt64 = 3;
+ optional sfixed64 FSfixed64 = 4;
+ optional sint64 FSint64 = 5;
+ optional uint64 FUint64 = 6;
+ optional fixed64 FFixed64 = 7;
+ optional int32 FInt32 = 8;
+ optional sfixed32 FSfixed32 = 9;
+ optional sint32 FSint32 = 10;
+ optional uint32 FUint32 = 11;
+ optional fixed32 FFixed32 = 12;
+ optional bool FBool = 13;
+ optional string FString = 14;
+ optional bytes FBytes = 15;
+}
+
+message TSimpleNested {
+ required int32 X = 1;
+ required TAllTypes Y = 2;
+}
+
+message TOptionalNested {
+ optional TAllTypes X = 1;
+}
+
+message TSimpleRepeated {
+ required int32 X = 1;
+ repeated int32 Y = 2;
+}
+
+message TNestedRepeated {
+ required int32 X = 1;
+ repeated TSimpleNested Y = 2;
+}
+
+message TRecursive {
+ required int32 X = 1;
+ required TRecursive Nested = 2;
+}
+
+message TRecursiveIndirectly {
+ message TNested {
+ required TRecursiveIndirectly Nested = 1;
+ }
+
+ required int32 X = 1;
+ repeated TNested Nested = 2;
+}
+
+message TMessageWithEnum {
+ enum ETestEnum {
+ VALUE1 = 0;
+ VALUE2 = 1;
+ }
+ repeated ETestEnum EnumValue = 1;
+}
+
+message TUnsplitted {
+ required int32 AInt = 1;
+ required uint32 AUint = 2;
+ required string AString = 3;
+ optional bool ABool = 4;
+}
+
+message TSplitted1 {
+ required int32 BInt = 1;
+ required string BString = 2;
+}
+
+message TSplitted2 {
+ required uint32 CUint = 1;
+ required string CString = 2;
+}
diff --git a/ydb/library/yql/public/purecalc/ut/protos/ya.make b/ydb/library/yql/public/purecalc/ut/protos/ya.make
new file mode 100644
index 0000000000..a455ff2fba
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/ut/protos/ya.make
@@ -0,0 +1,9 @@
+PROTO_LIBRARY()
+
+SRCS(
+ test_structs.proto
+)
+
+EXCLUDE_TAGS(GO_PROTO)
+
+END()
diff --git a/ydb/library/yql/public/purecalc/ut/test_eval.cpp b/ydb/library/yql/public/purecalc/ut/test_eval.cpp
new file mode 100644
index 0000000000..a556b47b03
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/ut/test_eval.cpp
@@ -0,0 +1,30 @@
+#include <ydb/library/yql/public/purecalc/purecalc.h>
+#include <ydb/library/yql/public/purecalc/io_specs/protobuf/spec.h>
+#include <ydb/library/yql/public/purecalc/ut/protos/test_structs.pb.h>
+#include <ydb/library/yql/public/purecalc/ut/empty_stream.h>
+
+#include <library/cpp/testing/unittest/registar.h>
+
+Y_UNIT_TEST_SUITE(TestEval) {
+ Y_UNIT_TEST(TestEvalExpr) {
+ using namespace NYql::NPureCalc;
+
+ auto options = TProgramFactoryOptions();
+ auto factory = MakeProgramFactory(options);
+
+ auto program = factory->MakePullListProgram(
+ TProtobufInputSpec<NPureCalcProto::TStringMessage>(),
+ TProtobufOutputSpec<NPureCalcProto::TStringMessage>(),
+ "SELECT Unwrap(cast(EvaluateExpr('foo' || 'bar') as Utf8)) AS X",
+ ETranslationMode::SQL
+ );
+
+ auto stream = program->Apply(EmptyStream<NPureCalcProto::TStringMessage*>());
+
+ NPureCalcProto::TStringMessage* message;
+
+ UNIT_ASSERT(message = stream->Fetch());
+ UNIT_ASSERT_EQUAL(message->GetX(), "foobar");
+ UNIT_ASSERT(!stream->Fetch());
+ }
+}
diff --git a/ydb/library/yql/public/purecalc/ut/test_pool.cpp b/ydb/library/yql/public/purecalc/ut/test_pool.cpp
new file mode 100644
index 0000000000..8c80ae9c84
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/ut/test_pool.cpp
@@ -0,0 +1,184 @@
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <ydb/library/yql/public/purecalc/common/interface.h>
+#include <ydb/library/yql/public/purecalc/io_specs/protobuf/spec.h>
+#include <ydb/library/yql/public/purecalc/ut/protos/test_structs.pb.h>
+#include <library/cpp/protobuf/util/pb_io.h>
+
+#include <util/string/cast.h>
+
+using namespace NYql::NPureCalc;
+
+namespace {
+ class TStringMessageStreamImpl: public IStream<NPureCalcProto::TStringMessage*> {
+ private:
+ ui32 I_ = 0;
+ NPureCalcProto::TStringMessage Message_{};
+
+ public:
+ NPureCalcProto::TStringMessage* Fetch() override {
+ if (I_ >= 3) {
+ return nullptr;
+ } else {
+ Message_.SetX(ToString(I_));
+ ++I_;
+ return &Message_;
+ }
+ }
+ };
+
+ class TStringMessageConsumerImpl: public IConsumer<NPureCalcProto::TStringMessage*> {
+ private:
+ TVector<TString>* Buf_;
+
+ public:
+ TStringMessageConsumerImpl(TVector<TString>* buf)
+ : Buf_(buf)
+ {
+ }
+
+ public:
+ void OnObject(NPureCalcProto::TStringMessage* t) override {
+ Buf_->push_back(t->GetX());
+ }
+
+ void OnFinish() override {
+ }
+ };
+
+}
+
+Y_UNIT_TEST_SUITE(TestWorkerPool) {
+ static TString sql = "SELECT 'abc'u || X AS X FROM Input";
+
+ static TVector<TString> expected{"abc0", "abc1", "abc2"};
+
+ void TestPullStreamImpl(bool useWorkerPool) {
+ auto factory = MakeProgramFactory(TProgramFactoryOptions().SetUseWorkerPool(useWorkerPool));
+
+ auto program = factory->MakePullStreamProgram(
+ TProtobufInputSpec<NPureCalcProto::TStringMessage>(),
+ TProtobufOutputSpec<NPureCalcProto::TStringMessage>(),
+ sql,
+ ETranslationMode::SQL
+ );
+
+ auto check = [](IStream<NPureCalcProto::TStringMessage*>* output) {
+ TVector<TString> actual;
+ while (auto *x = output->Fetch()) {
+ actual.push_back(x->GetX());
+ }
+
+ UNIT_ASSERT_VALUES_EQUAL(expected, actual);
+ };
+
+ // Sequential use
+ for (size_t i = 0; i < 2; ++i) {
+ auto output = program->Apply(MakeHolder<TStringMessageStreamImpl>());
+ check(output.Get());
+ }
+ // Parallel use
+ {
+ auto output1 = program->Apply(MakeHolder<TStringMessageStreamImpl>());
+ auto output2 = program->Apply(MakeHolder<TStringMessageStreamImpl>());
+ check(output1.Get());
+ check(output2.Get());
+ }
+ }
+
+ Y_UNIT_TEST(TestPullStreamUseWorkerPool) {
+ TestPullStreamImpl(true);
+ }
+
+ Y_UNIT_TEST(TestPullStreamNoWorkerPool) {
+ TestPullStreamImpl(false);
+ }
+
+ void TestPullListImpl(bool useWorkerPool) {
+ auto factory = MakeProgramFactory(TProgramFactoryOptions().SetUseWorkerPool(useWorkerPool));
+
+ auto program = factory->MakePullListProgram(
+ TProtobufInputSpec<NPureCalcProto::TStringMessage>(),
+ TProtobufOutputSpec<NPureCalcProto::TStringMessage>(),
+ sql,
+ ETranslationMode::SQL
+ );
+
+ auto check = [](IStream<NPureCalcProto::TStringMessage*>* output) {
+ TVector<TString> actual;
+ while (auto *x = output->Fetch()) {
+ actual.push_back(x->GetX());
+ }
+
+ UNIT_ASSERT_VALUES_EQUAL(expected, actual);
+ };
+
+ // Sequential use
+ for (size_t i = 0; i < 2; ++i) {
+ auto output = program->Apply(MakeHolder<TStringMessageStreamImpl>());
+ check(output.Get());
+ }
+ // Parallel use
+ {
+ auto output1 = program->Apply(MakeHolder<TStringMessageStreamImpl>());
+ auto output2 = program->Apply(MakeHolder<TStringMessageStreamImpl>());
+ check(output1.Get());
+ check(output2.Get());
+ }
+ }
+
+ Y_UNIT_TEST(TestPullListUseWorkerPool) {
+ TestPullListImpl(true);
+ }
+
+ Y_UNIT_TEST(TestPullListNoWorkerPool) {
+ TestPullListImpl(false);
+ }
+
+ void TestPushStreamImpl(bool useWorkerPool) {
+ auto factory = MakeProgramFactory(TProgramFactoryOptions().SetUseWorkerPool(useWorkerPool));
+
+ auto program = factory->MakePushStreamProgram(
+ TProtobufInputSpec<NPureCalcProto::TStringMessage>(),
+ TProtobufOutputSpec<NPureCalcProto::TStringMessage>(),
+ sql,
+ ETranslationMode::SQL
+ );
+
+ auto check = [](IConsumer<NPureCalcProto::TStringMessage*>* input, const TVector<TString>& result) {
+ NPureCalcProto::TStringMessage message;
+ for (auto s: {"0", "1", "2"}) {
+ message.SetX(s);
+ input->OnObject(&message);
+ }
+ input->OnFinish();
+
+ UNIT_ASSERT_VALUES_EQUAL(expected, result);
+ };
+
+ // Sequential use
+ for (size_t i = 0; i < 2; ++i) {
+ TVector<TString> actual;
+ auto input = program->Apply(MakeHolder<TStringMessageConsumerImpl>(&actual));
+ check(input.Get(), actual);
+ }
+
+ // Parallel use
+ {
+ TVector<TString> actual1;
+ auto input1 = program->Apply(MakeHolder<TStringMessageConsumerImpl>(&actual1));
+ TVector<TString> actual2;
+ auto input2 = program->Apply(MakeHolder<TStringMessageConsumerImpl>(&actual2));
+ check(input1.Get(), actual1);
+ check(input2.Get(), actual2);
+ }
+ }
+
+ Y_UNIT_TEST(TestPushStreamUseWorkerPool) {
+ TestPushStreamImpl(true);
+ }
+
+ Y_UNIT_TEST(TestPushStreamNoWorkerPool) {
+ TestPushStreamImpl(false);
+ }
+}
diff --git a/ydb/library/yql/public/purecalc/ut/test_schema.cpp b/ydb/library/yql/public/purecalc/ut/test_schema.cpp
new file mode 100644
index 0000000000..9763e52b00
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/ut/test_schema.cpp
@@ -0,0 +1 @@
+#include <library/cpp/testing/unittest/registar.h>
diff --git a/ydb/library/yql/public/purecalc/ut/test_sexpr.cpp b/ydb/library/yql/public/purecalc/ut/test_sexpr.cpp
new file mode 100644
index 0000000000..b9d55c0f98
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/ut/test_sexpr.cpp
@@ -0,0 +1,55 @@
+#include <ydb/library/yql/public/purecalc/purecalc.h>
+
+#include "fake_spec.h"
+
+#include <ydb/library/yql/public/purecalc/ut/protos/test_structs.pb.h>
+
+#include <library/cpp/testing/unittest/registar.h>
+
+Y_UNIT_TEST_SUITE(TestSExpr) {
+ Y_UNIT_TEST(TestSExprCompile) {
+ using namespace NYql::NPureCalc;
+
+ auto factory = MakeProgramFactory();
+
+ auto expr = TString(R"(
+ (
+ (return (Self '0))
+ )
+ )");
+
+ UNIT_ASSERT_NO_EXCEPTION([&](){
+ factory->MakePullStreamProgram(FakeIS(), FakeOS(), expr, ETranslationMode::SExpr);
+ }());
+
+ UNIT_ASSERT_NO_EXCEPTION([&](){
+ factory->MakePullListProgram(FakeIS(), FakeOS(), expr, ETranslationMode::SExpr);
+ }());
+
+ UNIT_ASSERT_NO_EXCEPTION([&](){
+ factory->MakePushStreamProgram(FakeIS(), FakeOS(), expr, ETranslationMode::SExpr);
+ }());
+ }
+
+ Y_UNIT_TEST(TestInvalidSExpr) {
+ using namespace NYql::NPureCalc;
+
+ auto factory = MakeProgramFactory();
+
+ auto sql = TString(R"(
+ Some totally invalid SExpr
+ )");
+
+ UNIT_ASSERT_EXCEPTION_CONTAINS([&](){
+ factory->MakePullStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SExpr);
+ }(), TCompileError, "failed to parse s-expression");
+
+ UNIT_ASSERT_EXCEPTION_CONTAINS([&](){
+ factory->MakePullListProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SExpr);
+ }(), TCompileError, "failed to parse s-expression");
+
+ UNIT_ASSERT_EXCEPTION_CONTAINS([&](){
+ factory->MakePushStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SExpr);
+ }(), TCompileError, "failed to parse s-expression");
+ }
+}
diff --git a/ydb/library/yql/public/purecalc/ut/test_sql.cpp b/ydb/library/yql/public/purecalc/ut/test_sql.cpp
new file mode 100644
index 0000000000..10157912a9
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/ut/test_sql.cpp
@@ -0,0 +1,205 @@
+#include <ydb/library/yql/public/purecalc/purecalc.h>
+
+#include "fake_spec.h"
+
+#include <ydb/library/yql/public/purecalc/ut/protos/test_structs.pb.h>
+
+#include <library/cpp/testing/unittest/registar.h>
+
+Y_UNIT_TEST_SUITE(TestSql) {
+ using namespace NYql::NPureCalc;
+
+ Y_UNIT_TEST(TestSqlCompile) {
+ auto factory = MakeProgramFactory();
+
+ auto sql = TString(R"(
+ SELECT * FROM Input;
+ )");
+
+ UNIT_ASSERT_NO_EXCEPTION([&](){
+ factory->MakePullStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL);
+ }());
+
+ UNIT_ASSERT_NO_EXCEPTION([&](){
+ factory->MakePullListProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL);
+ }());
+
+ auto program = factory->MakePushStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL);
+ auto expectedIssues = TString(R"(<main>: Warning: Type annotation, code: 1030
+ generated.sql:2:13: Warning: At function: PersistableRepr
+ generated.sql:2:13: Warning: Persistable required. Atom, key, world, datasink, datasource, callable, resource, stream and lambda are not persistable, code: 1104
+)");
+
+ UNIT_ASSERT_VALUES_EQUAL(expectedIssues, program->GetIssues().ToString());
+ }
+
+ Y_UNIT_TEST(TestSqlCompileSingleUnnamedInput) {
+ auto factory = MakeProgramFactory();
+
+ auto sql = TString(R"(
+ SELECT * FROM TABLES()
+ )");
+
+ UNIT_ASSERT_NO_EXCEPTION([&](){
+ factory->MakePullStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL);
+ }());
+
+ UNIT_ASSERT_NO_EXCEPTION([&](){
+ factory->MakePullListProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL);
+ }());
+
+ UNIT_ASSERT_NO_EXCEPTION([&](){
+ factory->MakePushStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL);
+ }());
+ }
+
+ Y_UNIT_TEST(TestSqlCompileNamedMultiinputs) {
+ auto factory = MakeProgramFactory();
+
+ auto sql = TString(R"(
+ SELECT * FROM Input0
+ UNION ALL
+ SELECT * FROM Input1
+ )");
+
+ UNIT_ASSERT_NO_EXCEPTION([&](){
+ factory->MakePullListProgram(FakeIS(2), FakeOS(), sql, ETranslationMode::SQL);
+ }());
+ }
+
+ Y_UNIT_TEST(TestSqlCompileUnnamedMultiinputs) {
+ auto factory = MakeProgramFactory();
+
+ auto sql = TString(R"(
+ $t0, $t1, $t2 = PROCESS TABLES();
+ SELECT * FROM $t0
+ UNION ALL
+ SELECT * FROM $t1
+ UNION ALL
+ SELECT * FROM $t2
+ )");
+
+ UNIT_ASSERT_NO_EXCEPTION([&](){
+ factory->MakePullListProgram(FakeIS(3), FakeOS(), sql, ETranslationMode::SQL);
+ }());
+ }
+
+ Y_UNIT_TEST(TestSqlCompileWithWarning) {
+ auto factory = MakeProgramFactory();
+
+ auto sql = TString(R"(
+ $x = 1;
+ $y = 2;
+ SELECT $x as Name FROM Input;
+ )");
+
+ auto expectedIssues = TString(R"(generated.sql:3:13: Warning: Symbol $y is not used, code: 4527
+<main>: Warning: Type annotation, code: 1030
+ generated.sql:4:13: Warning: At function: PersistableRepr
+ generated.sql:4:13: Warning: Persistable required. Atom, key, world, datasink, datasource, callable, resource, stream and lambda are not persistable, code: 1104
+)");
+
+ auto program = factory->MakePushStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL);
+ UNIT_ASSERT_VALUES_EQUAL(expectedIssues, program->GetIssues().ToString());
+ }
+
+ Y_UNIT_TEST(TestSqlWrongTableName) {
+ auto factory = MakeProgramFactory();
+
+ auto sql = TString(R"(
+ SELECT * FROM WrongTable;
+ )");
+
+ UNIT_ASSERT_EXCEPTION_CONTAINS([&](){
+ factory->MakePullStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL);
+ }(), TCompileError, "Failed to optimize");
+
+ UNIT_ASSERT_EXCEPTION_CONTAINS([&](){
+ factory->MakePullListProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL);
+ }(), TCompileError, "Failed to optimize");
+
+ UNIT_ASSERT_EXCEPTION_CONTAINS([&](){
+ factory->MakePushStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL);
+ }(), TCompileError, "Failed to optimize");
+ }
+
+ Y_UNIT_TEST(TestAllocateLargeStringOnEvaluate) {
+ auto factory = MakeProgramFactory();
+
+ auto sql = TString(R"(
+ $data = Length(EvaluateExpr("long string" || " very loooong string"));
+ SELECT $data as Name FROM Input;
+ )");
+
+ UNIT_ASSERT_NO_EXCEPTION([&](){
+ factory->MakePullStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL);
+ }());
+
+ UNIT_ASSERT_NO_EXCEPTION([&](){
+ factory->MakePullListProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL);
+ }());
+
+ UNIT_ASSERT_NO_EXCEPTION([&](){
+ factory->MakePushStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL);
+ }());
+ }
+
+ Y_UNIT_TEST(TestInvalidSql) {
+ auto factory = MakeProgramFactory();
+
+ auto sql = TString(R"(
+ Just some invalid SQL;
+ )");
+
+ UNIT_ASSERT_EXCEPTION_CONTAINS([&](){
+ factory->MakePullStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL);
+ }(), TCompileError, "failed to parse SQL");
+
+ UNIT_ASSERT_EXCEPTION_CONTAINS([&](){
+ factory->MakePullListProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL);
+ }(), TCompileError, "failed to parse SQL");
+
+ UNIT_ASSERT_EXCEPTION_CONTAINS([&](){
+ factory->MakePushStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL);
+ }(), TCompileError, "failed to parse SQL");
+ }
+
+ Y_UNIT_TEST(TestUseProcess) {
+ auto factory = MakeProgramFactory();
+
+ auto sql = TString(R"(
+ $processor = ($row) -> ($row);
+
+ PROCESS Input using $processor(TableRow());
+ )");
+
+ UNIT_ASSERT_NO_EXCEPTION([&](){
+ factory->MakePullStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL);
+ }());
+
+ UNIT_ASSERT_NO_EXCEPTION([&](){
+ factory->MakePullListProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL);
+ }());
+
+ UNIT_ASSERT_NO_EXCEPTION([&](){
+ factory->MakePushStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL);
+ }());
+ }
+
+ Y_UNIT_TEST(TestUseCodegen) {
+ auto factory = MakeProgramFactory();
+
+ auto sql = TString(R"(
+ $processor = ($row) -> {
+ $lambda = EvaluateCode(LambdaCode(($row) -> ($row)));
+ return $lambda($row);
+ };
+
+ PROCESS Input using $processor(TableRow());
+ )");
+
+ UNIT_ASSERT_NO_EXCEPTION([&](){
+ factory->MakePullListProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL);
+ }());
+ }
+}
diff --git a/ydb/library/yql/public/purecalc/ut/test_udf.cpp b/ydb/library/yql/public/purecalc/ut/test_udf.cpp
new file mode 100644
index 0000000000..a42326d521
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/ut/test_udf.cpp
@@ -0,0 +1,195 @@
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <ydb/library/yql/public/purecalc/purecalc.h>
+#include <ydb/library/yql/public/purecalc/io_specs/protobuf/spec.h>
+#include <ydb/library/yql/public/purecalc/ut/protos/test_structs.pb.h>
+#include <ydb/library/yql/public/udf/udf_counter.h>
+#include <ydb/library/yql/public/udf/udf_type_builder.h>
+#include <library/cpp/testing/unittest/registar.h>
+
+class TMyModule : public NKikimr::NUdf::IUdfModule {
+public:
+ class TFunc : public NKikimr::NUdf::TBoxedValue {
+ public:
+ TFunc(NKikimr::NUdf::TCounter counter, NKikimr::NUdf::TScopedProbe scopedProbe)
+ : Counter_(counter)
+ , ScopedProbe_(scopedProbe)
+ {}
+
+ NKikimr::NUdf::TUnboxedValue Run(const NKikimr::NUdf::IValueBuilder* valueBuilder, const NKikimr::NUdf::TUnboxedValuePod* args) const override {
+ Y_UNUSED(valueBuilder);
+ with_lock(ScopedProbe_) {
+ Counter_.Inc();
+ return NKikimr::NUdf::TUnboxedValuePod(args[0].Get<i32>());
+ }
+ }
+
+ private:
+ mutable NKikimr::NUdf::TCounter Counter_;
+ mutable NKikimr::NUdf::TScopedProbe ScopedProbe_;
+ };
+
+ void GetAllFunctions(NKikimr::NUdf::IFunctionsSink& sink) const override {
+ Y_UNUSED(sink);
+ }
+
+ void BuildFunctionTypeInfo(
+ const NKikimr::NUdf::TStringRef& name,
+ NKikimr::NUdf::TType* userType,
+ const NKikimr::NUdf::TStringRef& typeConfig,
+ ui32 flags,
+ NKikimr::NUdf::IFunctionTypeInfoBuilder& builder) const override {
+ Y_UNUSED(userType);
+ Y_UNUSED(typeConfig);
+ Y_UNUSED(flags);
+ if (name == NKikimr::NUdf::TStringRef::Of("Func")) {
+ builder.SimpleSignature<i32(i32)>();
+ builder.Implementation(new TFunc(
+ builder.GetCounter("FuncCalls",true),
+ builder.GetScopedProbe("FuncTime")
+ ));
+ }
+ }
+
+ void CleanupOnTerminate() const override {
+ }
+};
+
+class TMyCountersProvider : public NKikimr::NUdf::ICountersProvider, public NKikimr::NUdf::IScopedProbeHost {
+public:
+ TMyCountersProvider(i64* calls, TString* log)
+ : Calls_(calls)
+ , Log_(log)
+ {}
+
+ NKikimr::NUdf::TCounter GetCounter(const NKikimr::NUdf::TStringRef& module, const NKikimr::NUdf::TStringRef& name, bool deriv) override {
+ UNIT_ASSERT_VALUES_EQUAL(module, "MyModule");
+ UNIT_ASSERT_VALUES_EQUAL(name, "FuncCalls");
+ UNIT_ASSERT_VALUES_EQUAL(deriv, true);
+ return NKikimr::NUdf::TCounter(Calls_);
+ }
+
+ NKikimr::NUdf::TScopedProbe GetScopedProbe(const NKikimr::NUdf::TStringRef& module, const NKikimr::NUdf::TStringRef& name) override {
+ UNIT_ASSERT_VALUES_EQUAL(module, "MyModule");
+ UNIT_ASSERT_VALUES_EQUAL(name, "FuncTime");
+ return NKikimr::NUdf::TScopedProbe(Log_ ? this : nullptr, Log_);
+ }
+
+ void Acquire(void* cookie) override {
+ UNIT_ASSERT(cookie == Log_);
+ *Log_ += "Enter\n";
+ }
+
+ void Release(void* cookie) override {
+ UNIT_ASSERT(cookie == Log_);
+ *Log_ += "Exit\n";
+ }
+
+private:
+ i64* Calls_;
+ TString* Log_;
+};
+
+namespace NPureCalcProto {
+ class TUnparsed;
+ class TParsed;
+}
+
+class TDocInput : public NYql::NPureCalc::IStream<NPureCalcProto::TUnparsed*> {
+public:
+ NPureCalcProto::TUnparsed* Fetch() override {
+ if (Extracted) {
+ return nullptr;
+ }
+
+ Extracted = true;
+ Msg.SetS("foo");
+ return &Msg;
+ }
+
+public:
+ NPureCalcProto::TUnparsed Msg;
+ bool Extracted = false;
+};
+
+Y_UNIT_TEST_SUITE(TestUdf) {
+ Y_UNIT_TEST(TestCounters) {
+ using namespace NYql::NPureCalc;
+
+ auto factory = MakeProgramFactory();
+
+ i64 callCounter = 0;
+ TMyCountersProvider myCountersProvider(&callCounter, nullptr);
+ factory->AddUdfModule("MyModule", new TMyModule);
+ factory->SetCountersProvider(&myCountersProvider);
+
+ auto program = factory->MakePullStreamProgram(
+ TProtobufInputSpec<NPureCalcProto::TUnparsed>(),
+ TProtobufOutputSpec<NPureCalcProto::TParsed>(),
+ "select MyModule::Func(1) as A, 2 as B, 3 as C from Input",
+ ETranslationMode::SQL);
+
+ auto out = program->Apply(MakeHolder<TDocInput>());
+ auto* message = out->Fetch();
+ UNIT_ASSERT(message);
+ UNIT_ASSERT_VALUES_EQUAL(message->GetA(), 1);
+ UNIT_ASSERT_VALUES_EQUAL(message->GetB(), 2);
+ UNIT_ASSERT_VALUES_EQUAL(message->GetC(), 3);
+ UNIT_ASSERT_VALUES_EQUAL(callCounter, 1);
+ UNIT_ASSERT(!out->Fetch());
+ }
+
+ Y_UNIT_TEST(TestCountersFilteredColumns) {
+ using namespace NYql::NPureCalc;
+
+ auto factory = MakeProgramFactory();
+
+ i64 callCounter = 0;
+ TMyCountersProvider myCountersProvider(&callCounter, nullptr);
+ factory->AddUdfModule("MyModule", new TMyModule);
+ factory->SetCountersProvider(&myCountersProvider);
+
+ auto ospec = TProtobufOutputSpec<NPureCalcProto::TParsed>();
+ ospec.SetOutputColumnsFilter(THashSet<TString>({"B", "C"}));
+ auto program = factory->MakePullStreamProgram(
+ TProtobufInputSpec<NPureCalcProto::TUnparsed>(),
+ ospec,
+ "select MyModule::Func(1) as A, 2 as B, 3 as C from Input",
+ ETranslationMode::SQL);
+
+ auto out = program->Apply(MakeHolder<TDocInput>());
+ auto* message = out->Fetch();
+ UNIT_ASSERT(message);
+ UNIT_ASSERT_VALUES_EQUAL(message->GetA(), 0);
+ UNIT_ASSERT_VALUES_EQUAL(message->GetB(), 2);
+ UNIT_ASSERT_VALUES_EQUAL(message->GetC(), 3);
+ UNIT_ASSERT_VALUES_EQUAL(callCounter, 0);
+ UNIT_ASSERT(!out->Fetch());
+ }
+
+ Y_UNIT_TEST(TestScopedProbes) {
+ using namespace NYql::NPureCalc;
+
+ auto factory = MakeProgramFactory();
+
+ TString log;
+ TMyCountersProvider myCountersProvider(nullptr, &log);
+ factory->AddUdfModule("MyModule", new TMyModule);
+ factory->SetCountersProvider(&myCountersProvider);
+
+ auto program = factory->MakePullStreamProgram(
+ TProtobufInputSpec<NPureCalcProto::TUnparsed>(),
+ TProtobufOutputSpec<NPureCalcProto::TParsed>(),
+ "select MyModule::Func(1) as A, 2 as B, 3 as C from Input",
+ ETranslationMode::SQL);
+
+ auto out = program->Apply(MakeHolder<TDocInput>());
+ auto* message = out->Fetch();
+ UNIT_ASSERT(message);
+ UNIT_ASSERT_VALUES_EQUAL(message->GetA(), 1);
+ UNIT_ASSERT_VALUES_EQUAL(message->GetB(), 2);
+ UNIT_ASSERT_VALUES_EQUAL(message->GetC(), 3);
+ UNIT_ASSERT_VALUES_EQUAL(log, "Enter\nExit\n");
+ UNIT_ASSERT(!out->Fetch());
+ }
+}
diff --git a/ydb/library/yql/public/purecalc/ut/test_user_data.cpp b/ydb/library/yql/public/purecalc/ut/test_user_data.cpp
new file mode 100644
index 0000000000..3d0a0935ef
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/ut/test_user_data.cpp
@@ -0,0 +1,62 @@
+#include <ydb/library/yql/public/purecalc/purecalc.h>
+#include <ydb/library/yql/public/purecalc/io_specs/protobuf/spec.h>
+#include <ydb/library/yql/public/purecalc/ut/protos/test_structs.pb.h>
+#include <ydb/library/yql/public/purecalc/ut/empty_stream.h>
+
+#include <library/cpp/testing/unittest/registar.h>
+
+Y_UNIT_TEST_SUITE(TestUserData) {
+ Y_UNIT_TEST(TestUserData) {
+ using namespace NYql::NPureCalc;
+
+ auto options = TProgramFactoryOptions()
+ .AddFile(NYql::NUserData::EDisposition::INLINE, "my_file.txt", "my content!");
+
+ auto factory = MakeProgramFactory(options);
+
+ auto program = factory->MakePullListProgram(
+ TProtobufInputSpec<NPureCalcProto::TStringMessage>(),
+ TProtobufOutputSpec<NPureCalcProto::TStringMessage>(),
+ "SELECT UNWRAP(CAST(FileContent(\"my_file.txt\") AS Utf8)) AS X",
+ ETranslationMode::SQL
+ );
+
+ auto stream = program->Apply(EmptyStream<NPureCalcProto::TStringMessage*>());
+
+ NPureCalcProto::TStringMessage* message;
+
+ UNIT_ASSERT(message = stream->Fetch());
+ UNIT_ASSERT_EQUAL(message->GetX(), "my content!");
+ UNIT_ASSERT(!stream->Fetch());
+ }
+
+ Y_UNIT_TEST(TestUserDataLibrary) {
+ using namespace NYql::NPureCalc;
+
+ try {
+ auto options = TProgramFactoryOptions()
+ .AddLibrary(NYql::NUserData::EDisposition::INLINE, "a.sql", "$x = 1; EXPORT $x;")
+ .AddLibrary(NYql::NUserData::EDisposition::INLINE, "b.sql", "IMPORT a SYMBOLS $x; $y = CAST($x + 1 AS String); EXPORT $y;");
+
+ auto factory = MakeProgramFactory(options);
+
+ auto program = factory->MakePullListProgram(
+ TProtobufInputSpec<NPureCalcProto::TStringMessage>(),
+ TProtobufOutputSpec<NPureCalcProto::TStringMessage>(),
+ "IMPORT b SYMBOLS $y; SELECT CAST($y AS Utf8) ?? '' AS X;",
+ ETranslationMode::SQL
+ );
+
+ auto stream = program->Apply(EmptyStream<NPureCalcProto::TStringMessage*>());
+
+ NPureCalcProto::TStringMessage* message;
+
+ UNIT_ASSERT(message = stream->Fetch());
+ UNIT_ASSERT_EQUAL(message->GetX(), "2");
+ UNIT_ASSERT(!stream->Fetch());
+ } catch (const TCompileError& e) {
+ Cerr << e;
+ throw e;
+ }
+ }
+}
diff --git a/ydb/library/yql/public/purecalc/ut/ya.make b/ydb/library/yql/public/purecalc/ut/ya.make
new file mode 100644
index 0000000000..5b613a4669
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/ut/ya.make
@@ -0,0 +1,26 @@
+UNITTEST()
+
+SRCS(
+ empty_stream.h
+ fake_spec.cpp
+ fake_spec.h
+ test_schema.cpp
+ test_sexpr.cpp
+ test_sql.cpp
+ test_udf.cpp
+ test_user_data.cpp
+ test_eval.cpp
+ test_pool.cpp
+)
+
+PEERDIR(
+ ydb/library/yql/public/purecalc
+ ydb/library/yql/public/purecalc/io_specs/protobuf
+ ydb/library/yql/public/purecalc/ut/protos
+)
+
+SIZE(MEDIUM)
+
+YQL_LAST_ABI_VERSION()
+
+END()
diff --git a/ydb/library/yql/public/purecalc/ya.make b/ydb/library/yql/public/purecalc/ya.make
new file mode 100644
index 0000000000..3ac6ab6b59
--- /dev/null
+++ b/ydb/library/yql/public/purecalc/ya.make
@@ -0,0 +1,19 @@
+LIBRARY()
+
+SRCS(
+ purecalc.cpp
+)
+
+PEERDIR(
+ ydb/library/yql/public/udf/service/exception_policy
+ ydb/library/yql/public/purecalc/common
+)
+
+YQL_LAST_ABI_VERSION()
+
+END()
+
+RECURSE_FOR_TESTS(
+ io_specs/ut
+ ut
+)
diff --git a/ydb/library/yql/public/ya.make b/ydb/library/yql/public/ya.make
index 37dcc6368a..9fbf7e6a27 100644
--- a/ydb/library/yql/public/ya.make
+++ b/ydb/library/yql/public/ya.make
@@ -2,6 +2,10 @@ RECURSE(
decimal
fastcheck
issue
+ purecalc
+ purecalc/examples
+ purecalc/helpers
+ purecalc/io_specs
types
udf
udf/arrow
diff --git a/yql/CMakeLists.txt b/yql/CMakeLists.txt
new file mode 100644
index 0000000000..bd95d3af2b
--- /dev/null
+++ b/yql/CMakeLists.txt
@@ -0,0 +1,9 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(udfs)
diff --git a/yql/udfs/CMakeLists.txt b/yql/udfs/CMakeLists.txt
new file mode 100644
index 0000000000..867161a12c
--- /dev/null
+++ b/yql/udfs/CMakeLists.txt
@@ -0,0 +1,9 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(common)
diff --git a/yql/udfs/common/CMakeLists.txt b/yql/udfs/common/CMakeLists.txt
new file mode 100644
index 0000000000..46c961352b
--- /dev/null
+++ b/yql/udfs/common/CMakeLists.txt
@@ -0,0 +1,10 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(ip)
+add_subdirectory(url)
diff --git a/yql/udfs/common/ip/CMakeLists.darwin-x86_64.txt b/yql/udfs/common/ip/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..8c6217f557
--- /dev/null
+++ b/yql/udfs/common/ip/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,36 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(ip_udf INTERFACE)
+target_link_libraries(ip_udf INTERFACE
+ contrib-libs-cxxsupp
+ yutil
+ yql-public-udf
+ public-udf-support
+ library-cpp-ipreg
+ common-ip_base-lib
+)
+
+add_global_library_for(ip_udf.global ip_udf)
+target_compile_options(ip_udf.global PRIVATE
+ -DUDF_ABI_VERSION_MAJOR=2
+ -DUDF_ABI_VERSION_MINOR=28
+ -DUDF_ABI_VERSION_PATCH=0
+)
+target_link_libraries(ip_udf.global PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ yql-public-udf
+ public-udf-support
+ library-cpp-ipreg
+ common-ip_base-lib
+)
+target_sources(ip_udf.global PRIVATE
+ ${CMAKE_SOURCE_DIR}/yql/udfs/common/ip/ip_udf.cpp
+)
diff --git a/yql/udfs/common/ip/CMakeLists.linux-aarch64.txt b/yql/udfs/common/ip/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..9e54b0014d
--- /dev/null
+++ b/yql/udfs/common/ip/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,38 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(ip_udf INTERFACE)
+target_link_libraries(ip_udf INTERFACE
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ yql-public-udf
+ public-udf-support
+ library-cpp-ipreg
+ common-ip_base-lib
+)
+
+add_global_library_for(ip_udf.global ip_udf)
+target_compile_options(ip_udf.global PRIVATE
+ -DUDF_ABI_VERSION_MAJOR=2
+ -DUDF_ABI_VERSION_MINOR=28
+ -DUDF_ABI_VERSION_PATCH=0
+)
+target_link_libraries(ip_udf.global PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ yql-public-udf
+ public-udf-support
+ library-cpp-ipreg
+ common-ip_base-lib
+)
+target_sources(ip_udf.global PRIVATE
+ ${CMAKE_SOURCE_DIR}/yql/udfs/common/ip/ip_udf.cpp
+)
diff --git a/yql/udfs/common/ip/CMakeLists.linux-x86_64.txt b/yql/udfs/common/ip/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..9e54b0014d
--- /dev/null
+++ b/yql/udfs/common/ip/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,38 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(ip_udf INTERFACE)
+target_link_libraries(ip_udf INTERFACE
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ yql-public-udf
+ public-udf-support
+ library-cpp-ipreg
+ common-ip_base-lib
+)
+
+add_global_library_for(ip_udf.global ip_udf)
+target_compile_options(ip_udf.global PRIVATE
+ -DUDF_ABI_VERSION_MAJOR=2
+ -DUDF_ABI_VERSION_MINOR=28
+ -DUDF_ABI_VERSION_PATCH=0
+)
+target_link_libraries(ip_udf.global PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ yql-public-udf
+ public-udf-support
+ library-cpp-ipreg
+ common-ip_base-lib
+)
+target_sources(ip_udf.global PRIVATE
+ ${CMAKE_SOURCE_DIR}/yql/udfs/common/ip/ip_udf.cpp
+)
diff --git a/yql/udfs/common/ip/CMakeLists.txt b/yql/udfs/common/ip/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/yql/udfs/common/ip/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/yql/udfs/common/ip/CMakeLists.windows-x86_64.txt b/yql/udfs/common/ip/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..8c6217f557
--- /dev/null
+++ b/yql/udfs/common/ip/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,36 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(ip_udf INTERFACE)
+target_link_libraries(ip_udf INTERFACE
+ contrib-libs-cxxsupp
+ yutil
+ yql-public-udf
+ public-udf-support
+ library-cpp-ipreg
+ common-ip_base-lib
+)
+
+add_global_library_for(ip_udf.global ip_udf)
+target_compile_options(ip_udf.global PRIVATE
+ -DUDF_ABI_VERSION_MAJOR=2
+ -DUDF_ABI_VERSION_MINOR=28
+ -DUDF_ABI_VERSION_PATCH=0
+)
+target_link_libraries(ip_udf.global PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ yql-public-udf
+ public-udf-support
+ library-cpp-ipreg
+ common-ip_base-lib
+)
+target_sources(ip_udf.global PRIVATE
+ ${CMAKE_SOURCE_DIR}/yql/udfs/common/ip/ip_udf.cpp
+)
diff --git a/yql/udfs/common/url/CMakeLists.darwin-x86_64.txt b/yql/udfs/common/url/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..5b6b766df7
--- /dev/null
+++ b/yql/udfs/common/url/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,42 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(url_udf INTERFACE)
+target_link_libraries(url_udf INTERFACE
+ contrib-libs-cxxsupp
+ yutil
+ yql-public-udf
+ public-udf-support
+ kernel-hosts-owner
+ kernel-urlnorm
+ library-cpp-robots_txt
+ common-url_base-lib
+ yweb-robot-dbscheeme
+)
+
+add_global_library_for(url_udf.global url_udf)
+target_compile_options(url_udf.global PRIVATE
+ -DUDF_ABI_VERSION_MAJOR=2
+ -DUDF_ABI_VERSION_MINOR=33
+ -DUDF_ABI_VERSION_PATCH=0
+)
+target_link_libraries(url_udf.global PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ yql-public-udf
+ public-udf-support
+ kernel-hosts-owner
+ kernel-urlnorm
+ library-cpp-robots_txt
+ common-url_base-lib
+ yweb-robot-dbscheeme
+)
+target_sources(url_udf.global PRIVATE
+ ${CMAKE_SOURCE_DIR}/yql/udfs/common/url/url_udf.cpp
+)
diff --git a/yql/udfs/common/url/CMakeLists.linux-aarch64.txt b/yql/udfs/common/url/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..467982bd9d
--- /dev/null
+++ b/yql/udfs/common/url/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,44 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(url_udf INTERFACE)
+target_link_libraries(url_udf INTERFACE
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ yql-public-udf
+ public-udf-support
+ kernel-hosts-owner
+ kernel-urlnorm
+ library-cpp-robots_txt
+ common-url_base-lib
+ yweb-robot-dbscheeme
+)
+
+add_global_library_for(url_udf.global url_udf)
+target_compile_options(url_udf.global PRIVATE
+ -DUDF_ABI_VERSION_MAJOR=2
+ -DUDF_ABI_VERSION_MINOR=33
+ -DUDF_ABI_VERSION_PATCH=0
+)
+target_link_libraries(url_udf.global PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ yql-public-udf
+ public-udf-support
+ kernel-hosts-owner
+ kernel-urlnorm
+ library-cpp-robots_txt
+ common-url_base-lib
+ yweb-robot-dbscheeme
+)
+target_sources(url_udf.global PRIVATE
+ ${CMAKE_SOURCE_DIR}/yql/udfs/common/url/url_udf.cpp
+)
diff --git a/yql/udfs/common/url/CMakeLists.linux-x86_64.txt b/yql/udfs/common/url/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..467982bd9d
--- /dev/null
+++ b/yql/udfs/common/url/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,44 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(url_udf INTERFACE)
+target_link_libraries(url_udf INTERFACE
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ yql-public-udf
+ public-udf-support
+ kernel-hosts-owner
+ kernel-urlnorm
+ library-cpp-robots_txt
+ common-url_base-lib
+ yweb-robot-dbscheeme
+)
+
+add_global_library_for(url_udf.global url_udf)
+target_compile_options(url_udf.global PRIVATE
+ -DUDF_ABI_VERSION_MAJOR=2
+ -DUDF_ABI_VERSION_MINOR=33
+ -DUDF_ABI_VERSION_PATCH=0
+)
+target_link_libraries(url_udf.global PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ yql-public-udf
+ public-udf-support
+ kernel-hosts-owner
+ kernel-urlnorm
+ library-cpp-robots_txt
+ common-url_base-lib
+ yweb-robot-dbscheeme
+)
+target_sources(url_udf.global PRIVATE
+ ${CMAKE_SOURCE_DIR}/yql/udfs/common/url/url_udf.cpp
+)
diff --git a/yql/udfs/common/url/CMakeLists.txt b/yql/udfs/common/url/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/yql/udfs/common/url/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/yql/udfs/common/url/CMakeLists.windows-x86_64.txt b/yql/udfs/common/url/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..5b6b766df7
--- /dev/null
+++ b/yql/udfs/common/url/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,42 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(url_udf INTERFACE)
+target_link_libraries(url_udf INTERFACE
+ contrib-libs-cxxsupp
+ yutil
+ yql-public-udf
+ public-udf-support
+ kernel-hosts-owner
+ kernel-urlnorm
+ library-cpp-robots_txt
+ common-url_base-lib
+ yweb-robot-dbscheeme
+)
+
+add_global_library_for(url_udf.global url_udf)
+target_compile_options(url_udf.global PRIVATE
+ -DUDF_ABI_VERSION_MAJOR=2
+ -DUDF_ABI_VERSION_MINOR=33
+ -DUDF_ABI_VERSION_PATCH=0
+)
+target_link_libraries(url_udf.global PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ yql-public-udf
+ public-udf-support
+ kernel-hosts-owner
+ kernel-urlnorm
+ library-cpp-robots_txt
+ common-url_base-lib
+ yweb-robot-dbscheeme
+)
+target_sources(url_udf.global PRIVATE
+ ${CMAKE_SOURCE_DIR}/yql/udfs/common/url/url_udf.cpp
+)
diff --git a/yweb/CMakeLists.txt b/yweb/CMakeLists.txt
new file mode 100644
index 0000000000..d4925a6659
--- /dev/null
+++ b/yweb/CMakeLists.txt
@@ -0,0 +1,13 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(config)
+add_subdirectory(protos)
+add_subdirectory(realtime)
+add_subdirectory(robot)
+add_subdirectory(urlfilter)
diff --git a/yweb/config/CMakeLists.darwin-x86_64.txt b/yweb/config/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..f7c5bf51d3
--- /dev/null
+++ b/yweb/config/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,44 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_enum_parser_bin
+ TOOL_enum_parser_dependency
+ tools/enum_parser/enum_parser
+ enum_parser
+)
+
+add_library(yweb-config)
+target_link_libraries(yweb-config PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ kernel-langregion
+ kernel-multilanguage_hosts
+ library-cpp-charset
+ cpp-deprecated-fgood
+ cpp-string_utils-url
+ library-cpp-yconf
+ yweb-protos-robotzones
+ tools-enum_parser-enum_serialization_runtime
+)
+target_sources(yweb-config PRIVATE
+ ${CMAKE_SOURCE_DIR}/yweb/config/environment.cpp
+ ${CMAKE_SOURCE_DIR}/yweb/config/hostconfig.cpp
+ ${CMAKE_SOURCE_DIR}/yweb/config/clustercfg.cpp
+ ${CMAKE_SOURCE_DIR}/yweb/config/robot_zone.cpp
+ ${CMAKE_SOURCE_DIR}/yweb/config/search_zone.cpp
+ ${CMAKE_SOURCE_DIR}/yweb/config/langregion.cpp
+ ${CMAKE_SOURCE_DIR}/yweb/config/logreader_config.cpp
+ ${CMAKE_SOURCE_DIR}/yweb/config/domainlevel.cpp
+ ${CMAKE_SOURCE_DIR}/yweb/config/upload_rule.cpp
+)
+generate_enum_serilization(yweb-config
+ ${CMAKE_SOURCE_DIR}/yweb/config/environment.h
+ INCLUDE_HEADERS
+ yweb/config/environment.h
+)
diff --git a/yweb/config/CMakeLists.linux-aarch64.txt b/yweb/config/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..d3ee8e1783
--- /dev/null
+++ b/yweb/config/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,45 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_enum_parser_bin
+ TOOL_enum_parser_dependency
+ tools/enum_parser/enum_parser
+ enum_parser
+)
+
+add_library(yweb-config)
+target_link_libraries(yweb-config PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ kernel-langregion
+ kernel-multilanguage_hosts
+ library-cpp-charset
+ cpp-deprecated-fgood
+ cpp-string_utils-url
+ library-cpp-yconf
+ yweb-protos-robotzones
+ tools-enum_parser-enum_serialization_runtime
+)
+target_sources(yweb-config PRIVATE
+ ${CMAKE_SOURCE_DIR}/yweb/config/environment.cpp
+ ${CMAKE_SOURCE_DIR}/yweb/config/hostconfig.cpp
+ ${CMAKE_SOURCE_DIR}/yweb/config/clustercfg.cpp
+ ${CMAKE_SOURCE_DIR}/yweb/config/robot_zone.cpp
+ ${CMAKE_SOURCE_DIR}/yweb/config/search_zone.cpp
+ ${CMAKE_SOURCE_DIR}/yweb/config/langregion.cpp
+ ${CMAKE_SOURCE_DIR}/yweb/config/logreader_config.cpp
+ ${CMAKE_SOURCE_DIR}/yweb/config/domainlevel.cpp
+ ${CMAKE_SOURCE_DIR}/yweb/config/upload_rule.cpp
+)
+generate_enum_serilization(yweb-config
+ ${CMAKE_SOURCE_DIR}/yweb/config/environment.h
+ INCLUDE_HEADERS
+ yweb/config/environment.h
+)
diff --git a/yweb/config/CMakeLists.linux-x86_64.txt b/yweb/config/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..d3ee8e1783
--- /dev/null
+++ b/yweb/config/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,45 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_enum_parser_bin
+ TOOL_enum_parser_dependency
+ tools/enum_parser/enum_parser
+ enum_parser
+)
+
+add_library(yweb-config)
+target_link_libraries(yweb-config PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ kernel-langregion
+ kernel-multilanguage_hosts
+ library-cpp-charset
+ cpp-deprecated-fgood
+ cpp-string_utils-url
+ library-cpp-yconf
+ yweb-protos-robotzones
+ tools-enum_parser-enum_serialization_runtime
+)
+target_sources(yweb-config PRIVATE
+ ${CMAKE_SOURCE_DIR}/yweb/config/environment.cpp
+ ${CMAKE_SOURCE_DIR}/yweb/config/hostconfig.cpp
+ ${CMAKE_SOURCE_DIR}/yweb/config/clustercfg.cpp
+ ${CMAKE_SOURCE_DIR}/yweb/config/robot_zone.cpp
+ ${CMAKE_SOURCE_DIR}/yweb/config/search_zone.cpp
+ ${CMAKE_SOURCE_DIR}/yweb/config/langregion.cpp
+ ${CMAKE_SOURCE_DIR}/yweb/config/logreader_config.cpp
+ ${CMAKE_SOURCE_DIR}/yweb/config/domainlevel.cpp
+ ${CMAKE_SOURCE_DIR}/yweb/config/upload_rule.cpp
+)
+generate_enum_serilization(yweb-config
+ ${CMAKE_SOURCE_DIR}/yweb/config/environment.h
+ INCLUDE_HEADERS
+ yweb/config/environment.h
+)
diff --git a/yweb/config/CMakeLists.txt b/yweb/config/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/yweb/config/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/yweb/config/CMakeLists.windows-x86_64.txt b/yweb/config/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..f7c5bf51d3
--- /dev/null
+++ b/yweb/config/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,44 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_enum_parser_bin
+ TOOL_enum_parser_dependency
+ tools/enum_parser/enum_parser
+ enum_parser
+)
+
+add_library(yweb-config)
+target_link_libraries(yweb-config PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ kernel-langregion
+ kernel-multilanguage_hosts
+ library-cpp-charset
+ cpp-deprecated-fgood
+ cpp-string_utils-url
+ library-cpp-yconf
+ yweb-protos-robotzones
+ tools-enum_parser-enum_serialization_runtime
+)
+target_sources(yweb-config PRIVATE
+ ${CMAKE_SOURCE_DIR}/yweb/config/environment.cpp
+ ${CMAKE_SOURCE_DIR}/yweb/config/hostconfig.cpp
+ ${CMAKE_SOURCE_DIR}/yweb/config/clustercfg.cpp
+ ${CMAKE_SOURCE_DIR}/yweb/config/robot_zone.cpp
+ ${CMAKE_SOURCE_DIR}/yweb/config/search_zone.cpp
+ ${CMAKE_SOURCE_DIR}/yweb/config/langregion.cpp
+ ${CMAKE_SOURCE_DIR}/yweb/config/logreader_config.cpp
+ ${CMAKE_SOURCE_DIR}/yweb/config/domainlevel.cpp
+ ${CMAKE_SOURCE_DIR}/yweb/config/upload_rule.cpp
+)
+generate_enum_serilization(yweb-config
+ ${CMAKE_SOURCE_DIR}/yweb/config/environment.h
+ INCLUDE_HEADERS
+ yweb/config/environment.h
+)
diff --git a/yweb/protos/CMakeLists.darwin-x86_64.txt b/yweb/protos/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..b50e5a988f
--- /dev/null
+++ b/yweb/protos/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,387 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(robotzones)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(yweb-protos)
+target_link_libraries(yweb-protos PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ kernel-mango-proto
+ kernel-search_zone-protos
+ yweb-realtime-protos
+ lib-indexannportion-input
+ zora-proto-common
+ contrib-libs-protobuf
+)
+target_proto_messages(yweb-protos PRIVATE
+ ${CMAKE_SOURCE_DIR}/yweb/protos/anti_export.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/aura.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/docfactors.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/export.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/geo.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/hostfactors.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/indexdata.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/indexeddoc.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/links.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/maskswithint.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/metric.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/metaquery_lang.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/navsource.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/navsource_strict.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/orange.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/oxygen.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/ownertreeweight.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/ownerbanmasks.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/robot.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/spamchecker.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/spider.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/scdata.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/mragent.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/sr.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/roboturlatrs.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/regwordhost.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/owneraspamdeddata.proto
+)
+target_proto_addincls(yweb-protos
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(yweb-protos
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/yweb/protos/CMakeLists.linux-aarch64.txt b/yweb/protos/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..ebf3d0a113
--- /dev/null
+++ b/yweb/protos/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,388 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(robotzones)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(yweb-protos)
+target_link_libraries(yweb-protos PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ kernel-mango-proto
+ kernel-search_zone-protos
+ yweb-realtime-protos
+ lib-indexannportion-input
+ zora-proto-common
+ contrib-libs-protobuf
+)
+target_proto_messages(yweb-protos PRIVATE
+ ${CMAKE_SOURCE_DIR}/yweb/protos/anti_export.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/aura.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/docfactors.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/export.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/geo.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/hostfactors.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/indexdata.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/indexeddoc.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/links.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/maskswithint.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/metric.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/metaquery_lang.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/navsource.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/navsource_strict.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/orange.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/oxygen.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/ownertreeweight.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/ownerbanmasks.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/robot.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/spamchecker.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/spider.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/scdata.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/mragent.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/sr.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/roboturlatrs.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/regwordhost.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/owneraspamdeddata.proto
+)
+target_proto_addincls(yweb-protos
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(yweb-protos
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/yweb/protos/CMakeLists.linux-x86_64.txt b/yweb/protos/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..ebf3d0a113
--- /dev/null
+++ b/yweb/protos/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,388 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(robotzones)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(yweb-protos)
+target_link_libraries(yweb-protos PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ kernel-mango-proto
+ kernel-search_zone-protos
+ yweb-realtime-protos
+ lib-indexannportion-input
+ zora-proto-common
+ contrib-libs-protobuf
+)
+target_proto_messages(yweb-protos PRIVATE
+ ${CMAKE_SOURCE_DIR}/yweb/protos/anti_export.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/aura.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/docfactors.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/export.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/geo.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/hostfactors.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/indexdata.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/indexeddoc.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/links.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/maskswithint.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/metric.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/metaquery_lang.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/navsource.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/navsource_strict.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/orange.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/oxygen.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/ownertreeweight.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/ownerbanmasks.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/robot.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/spamchecker.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/spider.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/scdata.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/mragent.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/sr.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/roboturlatrs.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/regwordhost.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/owneraspamdeddata.proto
+)
+target_proto_addincls(yweb-protos
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(yweb-protos
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/yweb/protos/CMakeLists.txt b/yweb/protos/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/yweb/protos/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/yweb/protos/CMakeLists.windows-x86_64.txt b/yweb/protos/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..b50e5a988f
--- /dev/null
+++ b/yweb/protos/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,387 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(robotzones)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(yweb-protos)
+target_link_libraries(yweb-protos PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ kernel-mango-proto
+ kernel-search_zone-protos
+ yweb-realtime-protos
+ lib-indexannportion-input
+ zora-proto-common
+ contrib-libs-protobuf
+)
+target_proto_messages(yweb-protos PRIVATE
+ ${CMAKE_SOURCE_DIR}/yweb/protos/anti_export.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/aura.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/docfactors.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/export.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/geo.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/hostfactors.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/indexdata.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/indexeddoc.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/links.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/maskswithint.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/metric.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/metaquery_lang.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/navsource.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/navsource_strict.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/orange.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/oxygen.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/ownertreeweight.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/ownerbanmasks.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/robot.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/spamchecker.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/spider.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/scdata.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/mragent.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/sr.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/roboturlatrs.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/regwordhost.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/owneraspamdeddata.proto
+)
+target_proto_addincls(yweb-protos
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(yweb-protos
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/yweb/protos/robotzones/CMakeLists.darwin-x86_64.txt b/yweb/protos/robotzones/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..e2c5740248
--- /dev/null
+++ b/yweb/protos/robotzones/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,56 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(yweb-protos-robotzones)
+target_link_libraries(yweb-protos-robotzones PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-protobuf
+)
+target_proto_messages(yweb-protos-robotzones PRIVATE
+ ${CMAKE_SOURCE_DIR}/yweb/protos/robotzones/robotzone.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/robotzones/uploadrules.proto
+)
+target_proto_addincls(yweb-protos-robotzones
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(yweb-protos-robotzones
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/yweb/protos/robotzones/CMakeLists.linux-aarch64.txt b/yweb/protos/robotzones/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..0b52e1fdb2
--- /dev/null
+++ b/yweb/protos/robotzones/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,57 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(yweb-protos-robotzones)
+target_link_libraries(yweb-protos-robotzones PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-protobuf
+)
+target_proto_messages(yweb-protos-robotzones PRIVATE
+ ${CMAKE_SOURCE_DIR}/yweb/protos/robotzones/robotzone.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/robotzones/uploadrules.proto
+)
+target_proto_addincls(yweb-protos-robotzones
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(yweb-protos-robotzones
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/yweb/protos/robotzones/CMakeLists.linux-x86_64.txt b/yweb/protos/robotzones/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..0b52e1fdb2
--- /dev/null
+++ b/yweb/protos/robotzones/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,57 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(yweb-protos-robotzones)
+target_link_libraries(yweb-protos-robotzones PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-protobuf
+)
+target_proto_messages(yweb-protos-robotzones PRIVATE
+ ${CMAKE_SOURCE_DIR}/yweb/protos/robotzones/robotzone.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/robotzones/uploadrules.proto
+)
+target_proto_addincls(yweb-protos-robotzones
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(yweb-protos-robotzones
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/yweb/protos/robotzones/CMakeLists.txt b/yweb/protos/robotzones/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/yweb/protos/robotzones/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/yweb/protos/robotzones/CMakeLists.windows-x86_64.txt b/yweb/protos/robotzones/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..e2c5740248
--- /dev/null
+++ b/yweb/protos/robotzones/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,56 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(yweb-protos-robotzones)
+target_link_libraries(yweb-protos-robotzones PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-protobuf
+)
+target_proto_messages(yweb-protos-robotzones PRIVATE
+ ${CMAKE_SOURCE_DIR}/yweb/protos/robotzones/robotzone.proto
+ ${CMAKE_SOURCE_DIR}/yweb/protos/robotzones/uploadrules.proto
+)
+target_proto_addincls(yweb-protos-robotzones
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(yweb-protos-robotzones
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/yweb/realtime/CMakeLists.txt b/yweb/realtime/CMakeLists.txt
new file mode 100644
index 0000000000..6d580ae9ad
--- /dev/null
+++ b/yweb/realtime/CMakeLists.txt
@@ -0,0 +1,9 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(protos)
diff --git a/yweb/realtime/protos/CMakeLists.darwin-x86_64.txt b/yweb/realtime/protos/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..6444171984
--- /dev/null
+++ b/yweb/realtime/protos/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,56 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(yweb-realtime-protos)
+target_link_libraries(yweb-realtime-protos PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-protobuf
+)
+target_proto_messages(yweb-realtime-protos PRIVATE
+ ${CMAKE_SOURCE_DIR}/yweb/realtime/protos/distributor_dump.proto
+ ${CMAKE_SOURCE_DIR}/yweb/realtime/protos/option.proto
+)
+target_proto_addincls(yweb-realtime-protos
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(yweb-realtime-protos
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/yweb/realtime/protos/CMakeLists.linux-aarch64.txt b/yweb/realtime/protos/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..9a3a36a261
--- /dev/null
+++ b/yweb/realtime/protos/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,57 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(yweb-realtime-protos)
+target_link_libraries(yweb-realtime-protos PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-protobuf
+)
+target_proto_messages(yweb-realtime-protos PRIVATE
+ ${CMAKE_SOURCE_DIR}/yweb/realtime/protos/distributor_dump.proto
+ ${CMAKE_SOURCE_DIR}/yweb/realtime/protos/option.proto
+)
+target_proto_addincls(yweb-realtime-protos
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(yweb-realtime-protos
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/yweb/realtime/protos/CMakeLists.linux-x86_64.txt b/yweb/realtime/protos/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..9a3a36a261
--- /dev/null
+++ b/yweb/realtime/protos/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,57 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(yweb-realtime-protos)
+target_link_libraries(yweb-realtime-protos PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-protobuf
+)
+target_proto_messages(yweb-realtime-protos PRIVATE
+ ${CMAKE_SOURCE_DIR}/yweb/realtime/protos/distributor_dump.proto
+ ${CMAKE_SOURCE_DIR}/yweb/realtime/protos/option.proto
+)
+target_proto_addincls(yweb-realtime-protos
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(yweb-realtime-protos
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/yweb/realtime/protos/CMakeLists.txt b/yweb/realtime/protos/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/yweb/realtime/protos/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/yweb/realtime/protos/CMakeLists.windows-x86_64.txt b/yweb/realtime/protos/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..6444171984
--- /dev/null
+++ b/yweb/realtime/protos/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,56 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(yweb-realtime-protos)
+target_link_libraries(yweb-realtime-protos PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-protobuf
+)
+target_proto_messages(yweb-realtime-protos PRIVATE
+ ${CMAKE_SOURCE_DIR}/yweb/realtime/protos/distributor_dump.proto
+ ${CMAKE_SOURCE_DIR}/yweb/realtime/protos/option.proto
+)
+target_proto_addincls(yweb-realtime-protos
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(yweb-realtime-protos
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/yweb/robot/CMakeLists.txt b/yweb/robot/CMakeLists.txt
new file mode 100644
index 0000000000..9b430d841e
--- /dev/null
+++ b/yweb/robot/CMakeLists.txt
@@ -0,0 +1,10 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(dbscheeme)
+add_subdirectory(kiwi_queries)
diff --git a/yweb/robot/dbscheeme/CMakeLists.darwin-x86_64.txt b/yweb/robot/dbscheeme/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..c06e393a77
--- /dev/null
+++ b/yweb/robot/dbscheeme/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,32 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+find_package(ZLIB REQUIRED)
+
+add_library(yweb-robot-dbscheeme)
+target_link_libraries(yweb-robot-dbscheeme PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ ZLIB::ZLIB
+ kernel-hosts-owner
+ kernel-langregion
+ kernel-urlnorm
+ library-cpp-charset
+ cpp-deprecated-autoarray
+ cpp-digest-old_crc
+ library-cpp-microbdb
+ cpp-mime-types
+ library-cpp-robots_txt
+ yweb-config
+ yweb-protos
+ yweb-urlfilter
+)
+target_sources(yweb-robot-dbscheeme PRIVATE
+ ${CMAKE_SOURCE_DIR}/yweb/robot/dbscheeme/extrecords.cpp
+ ${CMAKE_SOURCE_DIR}/yweb/robot/dbscheeme/lib.cpp
+)
diff --git a/yweb/robot/dbscheeme/CMakeLists.linux-aarch64.txt b/yweb/robot/dbscheeme/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..04638ac386
--- /dev/null
+++ b/yweb/robot/dbscheeme/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,33 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+find_package(ZLIB REQUIRED)
+
+add_library(yweb-robot-dbscheeme)
+target_link_libraries(yweb-robot-dbscheeme PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ ZLIB::ZLIB
+ kernel-hosts-owner
+ kernel-langregion
+ kernel-urlnorm
+ library-cpp-charset
+ cpp-deprecated-autoarray
+ cpp-digest-old_crc
+ library-cpp-microbdb
+ cpp-mime-types
+ library-cpp-robots_txt
+ yweb-config
+ yweb-protos
+ yweb-urlfilter
+)
+target_sources(yweb-robot-dbscheeme PRIVATE
+ ${CMAKE_SOURCE_DIR}/yweb/robot/dbscheeme/extrecords.cpp
+ ${CMAKE_SOURCE_DIR}/yweb/robot/dbscheeme/lib.cpp
+)
diff --git a/yweb/robot/dbscheeme/CMakeLists.linux-x86_64.txt b/yweb/robot/dbscheeme/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..04638ac386
--- /dev/null
+++ b/yweb/robot/dbscheeme/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,33 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+find_package(ZLIB REQUIRED)
+
+add_library(yweb-robot-dbscheeme)
+target_link_libraries(yweb-robot-dbscheeme PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ ZLIB::ZLIB
+ kernel-hosts-owner
+ kernel-langregion
+ kernel-urlnorm
+ library-cpp-charset
+ cpp-deprecated-autoarray
+ cpp-digest-old_crc
+ library-cpp-microbdb
+ cpp-mime-types
+ library-cpp-robots_txt
+ yweb-config
+ yweb-protos
+ yweb-urlfilter
+)
+target_sources(yweb-robot-dbscheeme PRIVATE
+ ${CMAKE_SOURCE_DIR}/yweb/robot/dbscheeme/extrecords.cpp
+ ${CMAKE_SOURCE_DIR}/yweb/robot/dbscheeme/lib.cpp
+)
diff --git a/yweb/robot/dbscheeme/CMakeLists.txt b/yweb/robot/dbscheeme/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/yweb/robot/dbscheeme/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/yweb/robot/dbscheeme/CMakeLists.windows-x86_64.txt b/yweb/robot/dbscheeme/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..c06e393a77
--- /dev/null
+++ b/yweb/robot/dbscheeme/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,32 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+find_package(ZLIB REQUIRED)
+
+add_library(yweb-robot-dbscheeme)
+target_link_libraries(yweb-robot-dbscheeme PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ ZLIB::ZLIB
+ kernel-hosts-owner
+ kernel-langregion
+ kernel-urlnorm
+ library-cpp-charset
+ cpp-deprecated-autoarray
+ cpp-digest-old_crc
+ library-cpp-microbdb
+ cpp-mime-types
+ library-cpp-robots_txt
+ yweb-config
+ yweb-protos
+ yweb-urlfilter
+)
+target_sources(yweb-robot-dbscheeme PRIVATE
+ ${CMAKE_SOURCE_DIR}/yweb/robot/dbscheeme/extrecords.cpp
+ ${CMAKE_SOURCE_DIR}/yweb/robot/dbscheeme/lib.cpp
+)
diff --git a/yweb/robot/kiwi_queries/CMakeLists.txt b/yweb/robot/kiwi_queries/CMakeLists.txt
new file mode 100644
index 0000000000..82d0377971
--- /dev/null
+++ b/yweb/robot/kiwi_queries/CMakeLists.txt
@@ -0,0 +1,9 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(others)
diff --git a/yweb/robot/kiwi_queries/others/CMakeLists.txt b/yweb/robot/kiwi_queries/others/CMakeLists.txt
new file mode 100644
index 0000000000..ea6c677000
--- /dev/null
+++ b/yweb/robot/kiwi_queries/others/CMakeLists.txt
@@ -0,0 +1,9 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(lib)
diff --git a/yweb/robot/kiwi_queries/others/lib/CMakeLists.txt b/yweb/robot/kiwi_queries/others/lib/CMakeLists.txt
new file mode 100644
index 0000000000..d300d6d8a9
--- /dev/null
+++ b/yweb/robot/kiwi_queries/others/lib/CMakeLists.txt
@@ -0,0 +1,9 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(indexannportion)
diff --git a/yweb/robot/kiwi_queries/others/lib/indexannportion/CMakeLists.txt b/yweb/robot/kiwi_queries/others/lib/indexannportion/CMakeLists.txt
new file mode 100644
index 0000000000..1600e018b8
--- /dev/null
+++ b/yweb/robot/kiwi_queries/others/lib/indexannportion/CMakeLists.txt
@@ -0,0 +1,9 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(input)
diff --git a/yweb/robot/kiwi_queries/others/lib/indexannportion/input/CMakeLists.darwin-x86_64.txt b/yweb/robot/kiwi_queries/others/lib/indexannportion/input/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..4919e5c2a0
--- /dev/null
+++ b/yweb/robot/kiwi_queries/others/lib/indexannportion/input/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,43 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(lib-indexannportion-input)
+target_link_libraries(lib-indexannportion-input PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-protobuf
+)
+target_proto_messages(lib-indexannportion-input PRIVATE
+ ${CMAKE_SOURCE_DIR}/yweb/robot/kiwi_queries/others/lib/indexannportion/input/annotations.proto
+)
+target_proto_addincls(lib-indexannportion-input
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(lib-indexannportion-input
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/yweb/robot/kiwi_queries/others/lib/indexannportion/input/CMakeLists.linux-aarch64.txt b/yweb/robot/kiwi_queries/others/lib/indexannportion/input/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..e25b79e730
--- /dev/null
+++ b/yweb/robot/kiwi_queries/others/lib/indexannportion/input/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,44 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(lib-indexannportion-input)
+target_link_libraries(lib-indexannportion-input PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-protobuf
+)
+target_proto_messages(lib-indexannportion-input PRIVATE
+ ${CMAKE_SOURCE_DIR}/yweb/robot/kiwi_queries/others/lib/indexannportion/input/annotations.proto
+)
+target_proto_addincls(lib-indexannportion-input
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(lib-indexannportion-input
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/yweb/robot/kiwi_queries/others/lib/indexannportion/input/CMakeLists.linux-x86_64.txt b/yweb/robot/kiwi_queries/others/lib/indexannportion/input/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..e25b79e730
--- /dev/null
+++ b/yweb/robot/kiwi_queries/others/lib/indexannportion/input/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,44 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(lib-indexannportion-input)
+target_link_libraries(lib-indexannportion-input PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-protobuf
+)
+target_proto_messages(lib-indexannportion-input PRIVATE
+ ${CMAKE_SOURCE_DIR}/yweb/robot/kiwi_queries/others/lib/indexannportion/input/annotations.proto
+)
+target_proto_addincls(lib-indexannportion-input
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(lib-indexannportion-input
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/yweb/robot/kiwi_queries/others/lib/indexannportion/input/CMakeLists.txt b/yweb/robot/kiwi_queries/others/lib/indexannportion/input/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/yweb/robot/kiwi_queries/others/lib/indexannportion/input/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/yweb/robot/kiwi_queries/others/lib/indexannportion/input/CMakeLists.windows-x86_64.txt b/yweb/robot/kiwi_queries/others/lib/indexannportion/input/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..4919e5c2a0
--- /dev/null
+++ b/yweb/robot/kiwi_queries/others/lib/indexannportion/input/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,43 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(lib-indexannportion-input)
+target_link_libraries(lib-indexannportion-input PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-protobuf
+)
+target_proto_messages(lib-indexannportion-input PRIVATE
+ ${CMAKE_SOURCE_DIR}/yweb/robot/kiwi_queries/others/lib/indexannportion/input/annotations.proto
+)
+target_proto_addincls(lib-indexannportion-input
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(lib-indexannportion-input
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/yweb/urlfilter/CMakeLists.darwin-x86_64.txt b/yweb/urlfilter/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..fa1d7a24b2
--- /dev/null
+++ b/yweb/urlfilter/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,27 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(yweb-urlfilter)
+target_link_libraries(yweb-urlfilter PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ cpp-deprecated-autoarray
+ cpp-deprecated-datafile
+ cpp-deprecated-fgood
+ cpp-regex-glob
+ cpp-regex-pcre
+ cpp-string_utils-url
+ library-cpp-uri
+)
+target_sources(yweb-urlfilter PRIVATE
+ ${CMAKE_SOURCE_DIR}/yweb/urlfilter/hide.cpp
+ ${CMAKE_SOURCE_DIR}/yweb/urlfilter/urlfilter.cpp
+ ${CMAKE_SOURCE_DIR}/yweb/urlfilter/findboard.cpp
+ ${CMAKE_SOURCE_DIR}/yweb/urlfilter/urlfilter_builder.cpp
+)
diff --git a/yweb/urlfilter/CMakeLists.linux-aarch64.txt b/yweb/urlfilter/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..2466f7e39a
--- /dev/null
+++ b/yweb/urlfilter/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,28 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(yweb-urlfilter)
+target_link_libraries(yweb-urlfilter PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ cpp-deprecated-autoarray
+ cpp-deprecated-datafile
+ cpp-deprecated-fgood
+ cpp-regex-glob
+ cpp-regex-pcre
+ cpp-string_utils-url
+ library-cpp-uri
+)
+target_sources(yweb-urlfilter PRIVATE
+ ${CMAKE_SOURCE_DIR}/yweb/urlfilter/hide.cpp
+ ${CMAKE_SOURCE_DIR}/yweb/urlfilter/urlfilter.cpp
+ ${CMAKE_SOURCE_DIR}/yweb/urlfilter/findboard.cpp
+ ${CMAKE_SOURCE_DIR}/yweb/urlfilter/urlfilter_builder.cpp
+)
diff --git a/yweb/urlfilter/CMakeLists.linux-x86_64.txt b/yweb/urlfilter/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..2466f7e39a
--- /dev/null
+++ b/yweb/urlfilter/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,28 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(yweb-urlfilter)
+target_link_libraries(yweb-urlfilter PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ cpp-deprecated-autoarray
+ cpp-deprecated-datafile
+ cpp-deprecated-fgood
+ cpp-regex-glob
+ cpp-regex-pcre
+ cpp-string_utils-url
+ library-cpp-uri
+)
+target_sources(yweb-urlfilter PRIVATE
+ ${CMAKE_SOURCE_DIR}/yweb/urlfilter/hide.cpp
+ ${CMAKE_SOURCE_DIR}/yweb/urlfilter/urlfilter.cpp
+ ${CMAKE_SOURCE_DIR}/yweb/urlfilter/findboard.cpp
+ ${CMAKE_SOURCE_DIR}/yweb/urlfilter/urlfilter_builder.cpp
+)
diff --git a/yweb/urlfilter/CMakeLists.txt b/yweb/urlfilter/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/yweb/urlfilter/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/yweb/urlfilter/CMakeLists.windows-x86_64.txt b/yweb/urlfilter/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..fa1d7a24b2
--- /dev/null
+++ b/yweb/urlfilter/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,27 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(yweb-urlfilter)
+target_link_libraries(yweb-urlfilter PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ cpp-deprecated-autoarray
+ cpp-deprecated-datafile
+ cpp-deprecated-fgood
+ cpp-regex-glob
+ cpp-regex-pcre
+ cpp-string_utils-url
+ library-cpp-uri
+)
+target_sources(yweb-urlfilter PRIVATE
+ ${CMAKE_SOURCE_DIR}/yweb/urlfilter/hide.cpp
+ ${CMAKE_SOURCE_DIR}/yweb/urlfilter/urlfilter.cpp
+ ${CMAKE_SOURCE_DIR}/yweb/urlfilter/findboard.cpp
+ ${CMAKE_SOURCE_DIR}/yweb/urlfilter/urlfilter_builder.cpp
+)
diff --git a/zora/CMakeLists.txt b/zora/CMakeLists.txt
new file mode 100644
index 0000000000..6da313a87f
--- /dev/null
+++ b/zora/CMakeLists.txt
@@ -0,0 +1,9 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(zora)
diff --git a/zora/zora/CMakeLists.txt b/zora/zora/CMakeLists.txt
new file mode 100644
index 0000000000..499930c4b0
--- /dev/null
+++ b/zora/zora/CMakeLists.txt
@@ -0,0 +1,9 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(proto)
diff --git a/zora/zora/proto/CMakeLists.txt b/zora/zora/proto/CMakeLists.txt
new file mode 100644
index 0000000000..867161a12c
--- /dev/null
+++ b/zora/zora/proto/CMakeLists.txt
@@ -0,0 +1,9 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(common)
diff --git a/zora/zora/proto/common/CMakeLists.darwin-x86_64.txt b/zora/zora/proto/common/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..c7ebbe0c2b
--- /dev/null
+++ b/zora/zora/proto/common/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,43 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(zora-proto-common)
+target_link_libraries(zora-proto-common PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-protobuf
+)
+target_proto_messages(zora-proto-common PRIVATE
+ ${CMAKE_SOURCE_DIR}/zora/zora/proto/common/common.proto
+)
+target_proto_addincls(zora-proto-common
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(zora-proto-common
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/zora/zora/proto/common/CMakeLists.linux-aarch64.txt b/zora/zora/proto/common/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..8144cc9077
--- /dev/null
+++ b/zora/zora/proto/common/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,44 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(zora-proto-common)
+target_link_libraries(zora-proto-common PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-protobuf
+)
+target_proto_messages(zora-proto-common PRIVATE
+ ${CMAKE_SOURCE_DIR}/zora/zora/proto/common/common.proto
+)
+target_proto_addincls(zora-proto-common
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(zora-proto-common
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/zora/zora/proto/common/CMakeLists.linux-x86_64.txt b/zora/zora/proto/common/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..8144cc9077
--- /dev/null
+++ b/zora/zora/proto/common/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,44 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(zora-proto-common)
+target_link_libraries(zora-proto-common PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-protobuf
+)
+target_proto_messages(zora-proto-common PRIVATE
+ ${CMAKE_SOURCE_DIR}/zora/zora/proto/common/common.proto
+)
+target_proto_addincls(zora-proto-common
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(zora-proto-common
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)
diff --git a/zora/zora/proto/common/CMakeLists.txt b/zora/zora/proto/common/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/zora/zora/proto/common/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/zora/zora/proto/common/CMakeLists.windows-x86_64.txt b/zora/zora/proto/common/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..c7ebbe0c2b
--- /dev/null
+++ b/zora/zora/proto/common/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,43 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+
+add_library(zora-proto-common)
+target_link_libraries(zora-proto-common PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-protobuf
+)
+target_proto_messages(zora-proto-common PRIVATE
+ ${CMAKE_SOURCE_DIR}/zora/zora/proto/common/common.proto
+)
+target_proto_addincls(zora-proto-common
+ ./
+ ${CMAKE_SOURCE_DIR}/
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src
+)
+target_proto_outs(zora-proto-common
+ --cpp_out=${CMAKE_BINARY_DIR}/
+ --cpp_styleguide_out=${CMAKE_BINARY_DIR}/
+)